From d3b8f889a220aed825accc28eb64ce283a0d51ac Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 17 Aug 2009 16:40:47 -0700 Subject: x86: Make tsc=reliable override boot time stability checks This patch makes the tsc=reliable option disable the boot time stability checks. Currently the option only disables the runtime watchdog checks. This change allows folks who want to override the boot time TSC stability checks and use the TSC when the system would otherwise disqualify it. There still are some situations that the TSC will be disqualified, such as cpufreq scaling. But these are situations where the box will hang if allowed. Patch also includes a fix for an issue found by Thomas Gleixner, where the TSC disqualification message wouldn't be printed after a call to unsynchronized_tsc(). Signed-off-by: John Stultz Cc: Andrew Morton Cc: akataria@vmware.com Cc: Stephen Hemminger LKML-Reference: <1250552447.7212.92.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368b357e..648fb269e5d1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -825,6 +825,9 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return 0; + + if (tsc_clocksource_reliable) + return 0; /* * Intel systems are normally all synchronized. * Exceptions must mark TSC as unstable: @@ -832,10 +835,10 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { /* assume multi socket systems are not synchronized: */ if (num_possible_cpus() > 1) - tsc_unstable = 1; + return 1; } - return tsc_unstable; + return 0; } static void __init init_tsc_clocksource(void) -- cgit v1.2.2 From 5a7ae78fd478624df3059cb6f55056b85d074acc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Oct 2010 10:46:28 -0700 Subject: x86: Allow platforms to force enable apic Some embedded x86 platforms don't setup the APIC in the BIOS/bootloader and would be forced to add "lapic" on the kernel command line. That's a bit akward. Split out the force enable code from detect_init_APIC() and allow platform code to call it from the platform setup. That avoids the command line parameter and possible replication of the MSR dance in the force enable code. Signed-off-by: Thomas Gleixner LKML-Reference: <1287510389-8388-1-git-send-email-dirk.brandewie@gmail.com> Signed-off-by: Dirk Brandewie --- arch/x86/kernel/apic/apic.c | 87 ++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 850657d1b0ed..463839645f9b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1531,13 +1531,60 @@ static int __init detect_init_APIC(void) return 0; } #else + +static int apic_verify(void) +{ + u32 features, h, l; + + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + pr_warning("Could not enable APIC!\n"); + return -1; + } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + pr_info("Found and enabled local APIC!\n"); + return 0; +} + +int apic_force_enable(void) +{ + u32 h, l; + + if (disable_apic) + return -1; + + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + return apic_verify(); +} + /* * Detect and initialize APIC */ static int __init detect_init_APIC(void) { - u32 h, l, features; - /* Disabled by kernel option? */ if (disable_apic) return -1; @@ -1567,38 +1614,12 @@ static int __init detect_init_APIC(void) "you can enable it with \"lapic\"\n"); return -1; } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - pr_warning("Could not enable APIC!\n"); - return -1; + if (apic_force_enable()) + return -1; + } else { + if (apic_verify()) + return -1; } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - pr_info("Found and enabled local APIC!\n"); apic_pm_activate(); -- cgit v1.2.2 From 23f9b267159b4c7ff59d2e6c8ed31693eff841e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Oct 2010 15:38:50 -0700 Subject: x86: apic: Move probe_nr_irqs_gsi() into ioapic_init_mappings() probe_br_irqs_gsi() is called right after ioapic_init_mappings() and there are no other users. Move it into ioapic_init_mappings() so the declaration can disappear and the function can become static. Rename ioapic_init_mappings() to ioapic_and_gsi_init() to reflect that change. Signed-off-by: Thomas Gleixner LKML-Reference: <1287510389-8388-2-git-send-email-dirk.brandewie@gmail.com> Signed-off-by: Dirk Brandewie --- arch/x86/kernel/apic/io_apic.c | 6 ++++-- arch/x86/kernel/setup.c | 5 +---- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ae808d110f4..ce3c6fb4f357 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3639,7 +3639,7 @@ int __init io_apic_get_redir_entries (int ioapic) return reg_01.bits.entries + 1; } -void __init probe_nr_irqs_gsi(void) +static void __init probe_nr_irqs_gsi(void) { int nr; @@ -3951,7 +3951,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) return res; } -void __init ioapic_init_mappings(void) +void __init ioapic_and_gsi_init(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -3989,6 +3989,8 @@ fake_ioapic_page: ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } + + probe_nr_irqs_gsi(); } void __init ioapic_insert_resources(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 420e64197850..b8982e0fc0c2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1017,10 +1017,7 @@ void __init setup_arch(char **cmdline_p) #endif init_apic_mappings(); - ioapic_init_mappings(); - - /* need to wait for io_apic is mapped */ - probe_nr_irqs_gsi(); + ioapic_and_gsi_init(); kvm_guest_init(); -- cgit v1.2.2 From 9c37c9d89773ee9da9f6af28ee37d931bd045711 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:35 +0200 Subject: mce, amd: Implement mce_threshold_block_init() helper function This patch adds a helper function for the initial setup of an mce threshold block. The LVT offset is passed as argument. Also making variable threshold_defaults local as it is only used in function mce_amd_feature_init(). Function threshold_restart_bank() is extended to setup the LVT offset, the change is backward compatible. Thus, now there is only a single wrmsrl() to setup the block. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 48 ++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 80c482382d5c..f438318ee800 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -59,12 +59,6 @@ struct threshold_block { struct list_head miscj; }; -/* defaults used early on boot */ -static struct threshold_block threshold_defaults = { - .interrupt_enable = 0, - .threshold_limit = THRESHOLD_MAX, -}; - struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; @@ -89,6 +83,8 @@ static void amd_threshold_interrupt(void); struct thresh_restart { struct threshold_block *b; int reset; + int set_lvt_off; + int lvt_off; u16 old_limit; }; @@ -116,6 +112,12 @@ static void threshold_restart_bank(void *_tr) (new_count & THRESHOLD_MAX); } + if (tr->set_lvt_off) { + /* set new lvt offset */ + mci_misc_hi &= ~MASK_LVTOFF_HI; + mci_misc_hi |= tr->lvt_off << 20; + } + tr->b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); @@ -124,13 +126,25 @@ static void threshold_restart_bank(void *_tr) wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); } +static void mce_threshold_block_init(struct threshold_block *b, int offset) +{ + struct thresh_restart tr = { + .b = b, + .set_lvt_off = 1, + .lvt_off = offset, + }; + + b->threshold_limit = THRESHOLD_MAX; + threshold_restart_bank(&tr); +}; + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { + struct threshold_block b; unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - struct thresh_restart tr; int lvt_off = -1; u8 offset; @@ -186,16 +200,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) continue; } - high &= ~MASK_LVTOFF_HI; - high |= lvt_off << 20; - wrmsr(address, low, high); - - threshold_defaults.address = address; - tr.b = &threshold_defaults; - tr.reset = 0; - tr.old_limit = 0; - threshold_restart_bank(&tr); + memset(&b, 0, sizeof(b)); + b.cpu = cpu; + b.bank = bank; + b.block = block; + b.address = address; + mce_threshold_block_init(&b, offset); mce_threshold_vector = amd_threshold_interrupt; } } @@ -298,9 +309,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) b->interrupt_enable = !!new; + memset(&tr, 0, sizeof(tr)); tr.b = b; - tr.reset = 0; - tr.old_limit = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); @@ -321,10 +331,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) if (new < 1) new = 1; + memset(&tr, 0, sizeof(tr)); tr.old_limit = b->threshold_limit; b->threshold_limit = new; tr.b = b; - tr.reset = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); -- cgit v1.2.2 From 7203a0494084541575bac6dfc4e153f9e28869b8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:36 +0200 Subject: mce, amd: Shorten local variables mci_misc_{hi,lo} Shorten this variables to make later changes more readable. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f438318ee800..eb771b9fc0cb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -93,37 +93,37 @@ struct thresh_restart { static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; - u32 mci_misc_hi, mci_misc_lo; + u32 hi, lo; - rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) tr->reset = 1; /* limit cannot be lower than err count */ if (tr->reset) { /* reset err count and overflow bit */ - mci_misc_hi = - (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | + hi = + (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | (THRESHOLD_MAX - tr->b->threshold_limit); } else if (tr->old_limit) { /* change limit w/o reset */ - int new_count = (mci_misc_hi & THRESHOLD_MAX) + + int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); - mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | + hi = (hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } if (tr->set_lvt_off) { /* set new lvt offset */ - mci_misc_hi &= ~MASK_LVTOFF_HI; - mci_misc_hi |= tr->lvt_off << 20; + hi &= ~MASK_LVTOFF_HI; + hi |= tr->lvt_off << 20; } tr->b->interrupt_enable ? - (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : - (mci_misc_hi &= ~MASK_INT_TYPE_HI); + (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : + (hi &= ~MASK_INT_TYPE_HI); - mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + hi |= MASK_COUNT_EN_HI; + wrmsr(tr->b->address, lo, hi); } static void mce_threshold_block_init(struct threshold_block *b, int offset) -- cgit v1.2.2 From bbaff08dca3c34d0fb6b4c4051354184e33e3df8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:37 +0200 Subject: mce, amd: Add helper functions to setup APIC This patch reworks and cleans up mce_amd_feature_init() by introducing helper functions to setup and check the LVT offset. It also fixes line endings in pr_err() calls. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 67 ++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index eb771b9fc0cb..e316684f9ed7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -31,8 +31,6 @@ #include #include -#define PFX "mce_threshold: " -#define VERSION "version 1.1.1" #define NR_BANKS 6 #define NR_BLOCKS 9 #define THRESHOLD_MAX 0xFFF @@ -88,6 +86,27 @@ struct thresh_restart { u16 old_limit; }; +static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) +{ + int msr = (hi & MASK_LVTOFF_HI) >> 20; + + if (apic < 0) { + pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " + "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, + b->bank, b->block, b->address, hi, lo); + return 0; + } + + if (apic != msr) { + pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " + "for bank %d, block %d (MSR%08X=0x%x%08x)\n", + b->cpu, apic, b->bank, b->block, b->address, hi, lo); + return 0; + } + + return 1; +}; + /* must be called with correct cpu affinity */ /* Called via smp_call_function_single() */ static void threshold_restart_bank(void *_tr) @@ -113,9 +132,11 @@ static void threshold_restart_bank(void *_tr) } if (tr->set_lvt_off) { - /* set new lvt offset */ - hi &= ~MASK_LVTOFF_HI; - hi |= tr->lvt_off << 20; + if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { + /* set new lvt offset */ + hi &= ~MASK_LVTOFF_HI; + hi |= tr->lvt_off << 20; + } } tr->b->interrupt_enable ? @@ -138,6 +159,15 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) threshold_restart_bank(&tr); }; +static int setup_APIC_mce(int reserved, int new) +{ + if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, + APIC_EILVT_MSG_FIX, 0)) + return new; + + return reserved; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -145,8 +175,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int lvt_off = -1; - u8 offset; + int offset = -1; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -177,28 +206,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (shared_bank[bank] && c->cpu_core_id) break; #endif - offset = (high & MASK_LVTOFF_HI) >> 20; - if (lvt_off < 0) { - if (setup_APIC_eilvt(offset, - THRESHOLD_APIC_VECTOR, - APIC_EILVT_MSG_FIX, 0)) { - pr_err(FW_BUG "cpu %d, failed to " - "setup threshold interrupt " - "for bank %d, block %d " - "(MSR%08X=0x%x%08x)", - smp_processor_id(), bank, block, - address, high, low); - continue; - } - lvt_off = offset; - } else if (lvt_off != offset) { - pr_err(FW_BUG "cpu %d, invalid threshold " - "interrupt offset %d for bank %d," - "block %d (MSR%08X=0x%x%08x)", - smp_processor_id(), lvt_off, bank, - block, address, high, low); - continue; - } + offset = setup_APIC_mce(offset, + (high & MASK_LVTOFF_HI) >> 20); memset(&b, 0, sizeof(b)); b.cpu = cpu; -- cgit v1.2.2 From 0a17941e71f089b128514f7b5b486e20072ca7dc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:38 +0200 Subject: mce, amd: Remove goto in threshold_create_device() Removing the goto in threshold_create_device(). Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-5-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e316684f9ed7..5bf2fac52aca 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -622,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu) continue; err = threshold_create_bank(cpu, bank); if (err) - goto out; + return err; } -out: + return err; } -- cgit v1.2.2 From eb48c9cb2053e7bb5f7f8f0371cb578a0d439450 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:39 +0200 Subject: apic, amd: Make firmware bug messages more meaningful This improves error messages in case the BIOS was setting up wrong LVT offsets. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-6-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 850657d1b0ed..cb1304856a5c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -433,17 +433,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) reserved = reserve_eilvt_offset(offset, new); if (reserved != new) { - pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " - "vector 0x%x was already reserved by another core, " - "APIC%lX=0x%x\n", - smp_processor_id(), new, reserved, reg, old); + pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " + "vector 0x%x, but the register is already in use for " + "vector 0x%x on another cpu\n", + smp_processor_id(), reg, offset, new, reserved); return -EINVAL; } if (!eilvt_entry_is_changeable(old, new)) { - pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " - "register already in use, APIC%lX=0x%x\n", - smp_processor_id(), new, reg, old); + pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " + "vector 0x%x, but the register is already in use for " + "vector 0x%x on this cpu\n", + smp_processor_id(), reg, offset, new, old); return -EBUSY; } -- cgit v1.2.2 From 1ea6be212eea5ce1e8fabadacb0c639ad87b2f00 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 1 Nov 2010 22:44:34 +0100 Subject: x86, microcode, AMD: Replace vmalloc+memset with vzalloc We don't have to do memset() ourselves after vmalloc() when we have vzalloc(), so change that in arch/x86/kernel/microcode_amd.c::get_next_ucode(). Signed-off-by: Jesper Juhl Signed-off-by: Borislav Petkov --- arch/x86/kernel/microcode_amd.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index e1af7c055c7d..383d4f8ec9e1 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -183,16 +183,17 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; } - mc = vmalloc(UCODE_MAX_SIZE); - if (mc) { - memset(mc, 0, UCODE_MAX_SIZE); - if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, - total_size)) { - vfree(mc); - mc = NULL; - } else - *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; + mc = vzalloc(UCODE_MAX_SIZE); + if (!mc) + return NULL; + + if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { + vfree(mc); + mc = NULL; + } else { + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; } + return mc; } -- cgit v1.2.2 From c7657ac0c3e4d4ab569296911164b7a2b0ff871a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 1 Nov 2010 23:36:53 +0100 Subject: x86, microcode, AMD: Cleanup code a bit get_ucode_data is a memcpy() wrapper which always returns 0. Move it into the header and make it an inline. Remove all code checking its return value and turn it into a void. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov --- arch/x86/kernel/microcode_amd.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 383d4f8ec9e1..15831336bda8 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu) return 0; } -static int get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); - return 0; -} - static void * get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) { @@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; void *mc; - if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) - return NULL; + get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); if (section_hdr[0] != UCODE_UCODE_TYPE) { pr_err("error: invalid type field in container file section header\n"); @@ -187,12 +180,8 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) if (!mc) return NULL; - if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { - vfree(mc); - mc = NULL; - } else { - *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; - } + get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; return mc; } @@ -203,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf) unsigned int *buf_pos = (unsigned int *)container_hdr; unsigned long size; - if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) - return 0; + get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); size = buf_pos[2]; @@ -220,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf) } buf += UCODE_CONTAINER_HEADER_SIZE; - if (get_ucode_data(equiv_cpu_table, buf, size)) { - vfree(equiv_cpu_table); - return 0; - } + get_ucode_data(equiv_cpu_table, buf, size); return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ } -- cgit v1.2.2 From c5cbac69422a9bffe7c7fd9a115130e272b547f5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:51 -0800 Subject: x86, cpu: Rename verify_cpu_64.S to verify_cpu.S The code is 32bit already, and can be used in 32bit routines. Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-2-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin --- arch/x86/kernel/trampoline_64.S | 2 +- arch/x86/kernel/verify_cpu.S | 106 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/verify_cpu_64.S | 106 ---------------------------------------- 3 files changed, 107 insertions(+), 107 deletions(-) create mode 100644 arch/x86/kernel/verify_cpu.S delete mode 100644 arch/x86/kernel/verify_cpu_64.S (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 3af2dff58b21..075d130efcf9 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -127,7 +127,7 @@ startup_64: no_longmode: hlt jmp no_longmode -#include "verify_cpu_64.S" +#include "verify_cpu.S" # Careful these need to be in the same 64K segment as the above; tidt: diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S new file mode 100644 index 000000000000..56a8c2a867d9 --- /dev/null +++ b/arch/x86/kernel/verify_cpu.S @@ -0,0 +1,106 @@ +/* + * + * verify_cpu.S - Code for cpu long mode and SSE verification. This + * code has been borrowed from boot/setup.S and was introduced by + * Andi Kleen. + * + * Copyright (c) 2007 Andi Kleen (ak@suse.de) + * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) + * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + * This is a common code for verification whether CPU supports + * long mode and SSE or not. It is not called directly instead this + * file is included at various places and compiled in that context. + * Following are the current usage. + * + * This file is included by both 16bit and 32bit code. + * + * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) + * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) + * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) + * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) + * + * verify_cpu, returns the status of cpu check in register %eax. + * 0: Success 1: Failure + * + * The caller needs to check for the error code and take the action + * appropriately. Either display a message or halt. + */ + +#include +#include + +verify_cpu: + pushfl # Save caller passed flags + pushl $0 # Kill any dangerous flags + popfl + + pushfl # standard way to check for cpuid + popl %eax + movl %eax,%ebx + xorl $0x200000,%eax + pushl %eax + popfl + pushfl + popl %eax + cmpl %eax,%ebx + jz verify_cpu_no_longmode # cpu has no cpuid + + movl $0x0,%eax # See if cpuid 1 is implemented + cpuid + cmpl $0x1,%eax + jb verify_cpu_no_longmode # no cpuid 1 + + xor %di,%di + cmpl $0x68747541,%ebx # AuthenticAMD + jnz verify_cpu_noamd + cmpl $0x69746e65,%edx + jnz verify_cpu_noamd + cmpl $0x444d4163,%ecx + jnz verify_cpu_noamd + mov $1,%di # cpu is from AMD + +verify_cpu_noamd: + movl $0x1,%eax # Does the cpu have what it takes + cpuid + andl $REQUIRED_MASK0,%edx + xorl $REQUIRED_MASK0,%edx + jnz verify_cpu_no_longmode + + movl $0x80000000,%eax # See if extended cpuid is implemented + cpuid + cmpl $0x80000001,%eax + jb verify_cpu_no_longmode # no extended cpuid + + movl $0x80000001,%eax # Does the cpu have what it takes + cpuid + andl $REQUIRED_MASK1,%edx + xorl $REQUIRED_MASK1,%edx + jnz verify_cpu_no_longmode + +verify_cpu_sse_test: + movl $1,%eax + cpuid + andl $SSE_MASK,%edx + cmpl $SSE_MASK,%edx + je verify_cpu_sse_ok + test %di,%di + jz verify_cpu_no_longmode # only try to force SSE on AMD + movl $MSR_K7_HWCR,%ecx + rdmsr + btr $15,%eax # enable SSE + wrmsr + xor %di,%di # don't loop + jmp verify_cpu_sse_test # try again + +verify_cpu_no_longmode: + popfl # Restore caller passed flags + movl $1,%eax + ret +verify_cpu_sse_ok: + popfl # Restore caller passed flags + xorl %eax, %eax + ret diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S deleted file mode 100644 index 56a8c2a867d9..000000000000 --- a/arch/x86/kernel/verify_cpu_64.S +++ /dev/null @@ -1,106 +0,0 @@ -/* - * - * verify_cpu.S - Code for cpu long mode and SSE verification. This - * code has been borrowed from boot/setup.S and was introduced by - * Andi Kleen. - * - * Copyright (c) 2007 Andi Kleen (ak@suse.de) - * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) - * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - * - * This is a common code for verification whether CPU supports - * long mode and SSE or not. It is not called directly instead this - * file is included at various places and compiled in that context. - * Following are the current usage. - * - * This file is included by both 16bit and 32bit code. - * - * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) - * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) - * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) - * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) - * - * verify_cpu, returns the status of cpu check in register %eax. - * 0: Success 1: Failure - * - * The caller needs to check for the error code and take the action - * appropriately. Either display a message or halt. - */ - -#include -#include - -verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl - - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - jz verify_cpu_no_longmode # cpu has no cpuid - - movl $0x0,%eax # See if cpuid 1 is implemented - cpuid - cmpl $0x1,%eax - jb verify_cpu_no_longmode # no cpuid 1 - - xor %di,%di - cmpl $0x68747541,%ebx # AuthenticAMD - jnz verify_cpu_noamd - cmpl $0x69746e65,%edx - jnz verify_cpu_noamd - cmpl $0x444d4163,%ecx - jnz verify_cpu_noamd - mov $1,%di # cpu is from AMD - -verify_cpu_noamd: - movl $0x1,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK0,%edx - xorl $REQUIRED_MASK0,%edx - jnz verify_cpu_no_longmode - - movl $0x80000000,%eax # See if extended cpuid is implemented - cpuid - cmpl $0x80000001,%eax - jb verify_cpu_no_longmode # no extended cpuid - - movl $0x80000001,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK1,%edx - xorl $REQUIRED_MASK1,%edx - jnz verify_cpu_no_longmode - -verify_cpu_sse_test: - movl $1,%eax - cpuid - andl $SSE_MASK,%edx - cmpl $SSE_MASK,%edx - je verify_cpu_sse_ok - test %di,%di - jz verify_cpu_no_longmode # only try to force SSE on AMD - movl $MSR_K7_HWCR,%ecx - rdmsr - btr $15,%eax # enable SSE - wrmsr - xor %di,%di # don't loop - jmp verify_cpu_sse_test # try again - -verify_cpu_no_longmode: - popfl # Restore caller passed flags - movl $1,%eax - ret -verify_cpu_sse_ok: - popfl # Restore caller passed flags - xorl %eax, %eax - ret -- cgit v1.2.2 From ae84739c27b6b3725993202fe02ff35ab86468e1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:52 -0800 Subject: x86, cpu: Clear XD_DISABLED flag on Intel to regain NX Intel CPUs have an additional MSR bit to indicate if the BIOS was configured to disable the NX cpu feature. This bit was traditionally used for operating systems that did not understand how to handle the NX bit. Since Linux understands this, this BIOS flag should be ignored by default. In a review[1] of reported hardware being used by Ubuntu bug reporters, almost 10% of systems had an incorrectly configured BIOS, leaving their systems unable to use the NX features of their CPU. This change will clear the MSR_IA32_MISC_ENABLE_XD_DISABLE bit so that NX cannot be inappropriately controlled by the BIOS on Intel CPUs. If, under very strange hardware configurations, NX actually needs to be disabled, "noexec=off" can be used to restore the prior behavior. [1] http://www.outflux.net/blog/archives/2010/02/18/data-mining-for-nx-bit/ Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-3-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin --- arch/x86/kernel/verify_cpu.S | 48 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 56a8c2a867d9..ccb4136da0aa 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -7,6 +7,7 @@ * Copyright (c) 2007 Andi Kleen (ak@suse.de) * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) + * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. @@ -14,18 +15,16 @@ * This is a common code for verification whether CPU supports * long mode and SSE or not. It is not called directly instead this * file is included at various places and compiled in that context. - * Following are the current usage. + * This file is expected to run in 32bit code. Currently: * - * This file is included by both 16bit and 32bit code. + * arch/x86_64/boot/compressed/head_64.S: Boot cpu verification + * arch/x86_64/kernel/trampoline_64.S: secondary processor verfication * - * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) - * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) - * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) - * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) - * - * verify_cpu, returns the status of cpu check in register %eax. + * verify_cpu, returns the status of longmode and SSE in register %eax. * 0: Success 1: Failure * + * On Intel, the XD_DISABLE flag will be cleared as a side-effect. + * * The caller needs to check for the error code and take the action * appropriately. Either display a message or halt. */ @@ -62,8 +61,41 @@ verify_cpu: cmpl $0x444d4163,%ecx jnz verify_cpu_noamd mov $1,%di # cpu is from AMD + jmp verify_cpu_check verify_cpu_noamd: + cmpl $0x756e6547,%ebx # GenuineIntel? + jnz verify_cpu_check + cmpl $0x49656e69,%edx + jnz verify_cpu_check + cmpl $0x6c65746e,%ecx + jnz verify_cpu_check + + # only call IA32_MISC_ENABLE when: + # family > 6 || (family == 6 && model >= 0xd) + movl $0x1, %eax # check CPU family and model + cpuid + movl %eax, %ecx + + andl $0x0ff00f00, %eax # mask family and extended family + shrl $8, %eax + cmpl $6, %eax + ja verify_cpu_clear_xd # family > 6, ok + jb verify_cpu_check # family < 6, skip + + andl $0x000f00f0, %ecx # mask model and extended model + shrl $4, %ecx + cmpl $0xd, %ecx + jb verify_cpu_check # family == 6, model < 0xd, skip + +verify_cpu_clear_xd: + movl $MSR_IA32_MISC_ENABLE, %ecx + rdmsr + btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE + jnc verify_cpu_check # only write MSR if bit was changed + wrmsr + +verify_cpu_check: movl $0x1,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK0,%edx -- cgit v1.2.2 From ebba638ae723d8a8fc2f7abce5ec18b688b791d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:53 -0800 Subject: x86, cpu: Call verify_cpu during 32bit CPU startup The XD_DISABLE-clearing side-effect needs to happen for both 32bit and 64bit, but the 32bit init routines were not calling verify_cpu() yet. This adds that call to gain the side-effect. The longmode/SSE tests being performed in verify_cpu() need to happen very early for 64bit but not for 32bit. Instead of including it in two places for 32bit, we can just include it once in arch/x86/kernel/head_32.S. Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-4-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 6 ++++++ arch/x86/kernel/verify_cpu.S | 1 + 2 files changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index bcece91dd311..fdaea523ac8f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -314,6 +314,10 @@ ENTRY(startup_32_smp) subl $0x80000001, %eax cmpl $(0x8000ffff-0x80000001), %eax ja 6f + + /* Clear bogus XD_DISABLE bits */ + call verify_cpu + mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ @@ -609,6 +613,8 @@ ignore_int: #endif iret +#include "verify_cpu.S" + __REFDATA .align 4 ENTRY(initial_code) diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index ccb4136da0aa..5644b4b7ed28 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -19,6 +19,7 @@ * * arch/x86_64/boot/compressed/head_64.S: Boot cpu verification * arch/x86_64/kernel/trampoline_64.S: secondary processor verfication + * arch/x86_64/kernel/head_32.S: processor startup * * verify_cpu, returns the status of longmode and SSE in register %eax. * 0: Success 1: Failure -- cgit v1.2.2 From c751e17b5371ad86cdde6cf5c0175e06f3ff0347 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Nov 2010 12:08:04 -0800 Subject: x86: Add CE4100 platform support Add CE4100 platform support. CE4100 needs early setup like moorestown. Signed-off-by: Thomas Gleixner Signed-off-by: Dirk Brandewie LKML-Reference: <94720fd7f5564a12ebf202cf2c4f4c0d619aab35.1289331834.git.dirk.brandewie@gmail.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 763310165fa0..7f138b3c3c52 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -61,6 +61,9 @@ void __init i386_start_kernel(void) case X86_SUBARCH_MRST: x86_mrst_early_setup(); break; + case X86_SUBARCH_CE4100: + x86_ce4100_early_setup(); + break; default: i386_default_early_setup(); break; -- cgit v1.2.2 From 37bc9f5078c62bfa73edeb0053edceb3ed5e46a4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 9 Nov 2010 12:08:08 -0800 Subject: x86: Ce4100: Add reboot_fixup() for CE4100 This patch adds the CE4100 reboot fixup to reboot_fixups_32.c [ tglx: Moved PCI id to reboot_fixups_32.c ] Signed-off-by: Dirk Brandewie LKML-Reference: <5bdcfb4f0206fa721570504e95659a03b815bc5e.1289331834.git.dirk.brandewie@gmail.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot_fixups_32.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index fda313ebbb03..c8e41e90f59c 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev) outb(1, 0x92); } +static void ce4100_reset(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < 10; i++) { + outb(0x2, 0xcf9); + udelay(50); + } +} + struct device_fixup { unsigned int vendor; unsigned int device; void (*reboot_fixup)(struct pci_dev *); }; +/* + * PCI ids solely used for fixups_table go here + */ +#define PCI_DEVICE_ID_INTEL_CE4100 0x0708 + static const struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, +{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset }, }; /* -- cgit v1.2.2 From 79250af2d5953b69380a6319b493862bf4ece972 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Nov 2010 10:10:04 -0800 Subject: x86: Fix included-by file reference comments Adjust the paths for files that are including verify_cpu.S. Reported-by: Yinghai Lu Signed-off-by: Kees Cook Acked-by: Pekka Enberg Cc: Alan Cox LKML-Reference: <1289931004-16066-1-git-send-email-kees.cook@canonical.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/verify_cpu.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 5644b4b7ed28..0edefc19a113 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -17,9 +17,9 @@ * file is included at various places and compiled in that context. * This file is expected to run in 32bit code. Currently: * - * arch/x86_64/boot/compressed/head_64.S: Boot cpu verification - * arch/x86_64/kernel/trampoline_64.S: secondary processor verfication - * arch/x86_64/kernel/head_32.S: processor startup + * arch/x86/boot/compressed/head_64.S: Boot cpu verification + * arch/x86/kernel/trampoline_64.S: secondary processor verfication + * arch/x86/kernel/head_32.S: processor startup * * verify_cpu, returns the status of longmode and SSE in register %eax. * 0: Success 1: Failure -- cgit v1.2.2 From 82148d1d0b2f369851f2dff5088f7840f9f16abf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sh=C3=A9rab?= Date: Sat, 25 Sep 2010 06:06:57 +0200 Subject: x86/platform: Add Eurobraille/Iris power off support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Iris machines from Eurobraille do not have APM or ACPI support to shut themselves down properly. A special I/O sequence is needed to do so. This modle runs this I/O sequence at kernel shutdown when its force parameter is set to 1. Signed-off-by: Shérab Acked-by: "H. Peter Anvin" [ did minor coding style edits ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/iris.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 arch/x86/kernel/iris.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9e13763b6092..beac17a0fcab 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-$(CONFIG_X86_32_IRIS) += iris.o ### # 64 bit specific files diff --git a/arch/x86/kernel/iris.c b/arch/x86/kernel/iris.c new file mode 100644 index 000000000000..1ba7f5ed8c9b --- /dev/null +++ b/arch/x86/kernel/iris.c @@ -0,0 +1,91 @@ +/* + * Eurobraille/Iris power off support. + * + * Eurobraille's Iris machine is a PC with no APM or ACPI support. + * It is shutdown by a special I/O sequence which this module provides. + * + * Copyright (C) Shérab + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IRIS_GIO_BASE 0x340 +#define IRIS_GIO_INPUT IRIS_GIO_BASE +#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1) +#define IRIS_GIO_PULSE 0x80 /* First byte to send */ +#define IRIS_GIO_REST 0x00 /* Second byte to send */ +#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sébastien Hinderer "); +MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); +MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); + +static int force; + +module_param(force, bool, 0); +MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); + +static void (*old_pm_power_off)(void); + +static void iris_power_off(void) +{ + outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT); + msleep(850); + outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT); +} + +/* + * Before installing the power_off handler, try to make sure the OS is + * running on an Iris. Since Iris does not support DMI, this is done + * by reading its input port and seeing whether the read value is + * meaningful. + */ +static int iris_init(void) +{ + unsigned char status; + if (force != 1) { + printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n"); + return -ENODEV; + } + status = inb(IRIS_GIO_INPUT); + if (status == IRIS_GIO_NODEV) { + printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n"); + return -ENODEV; + } + old_pm_power_off = pm_power_off; + pm_power_off = &iris_power_off; + printk(KERN_INFO "Iris power_off handler installed.\n"); + + return 0; +} + +static void iris_exit(void) +{ + pm_power_off = old_pm_power_off; + printk(KERN_INFO "Iris power_off handler uninstalled.\n"); +} + +module_init(iris_init); +module_exit(iris_exit); -- cgit v1.2.2 From 5bd5a452662bc37c54fb6828db1a3faf87e6511c Mon Sep 17 00:00:00 2001 From: Matthieu Castet Date: Tue, 16 Nov 2010 22:31:26 +0100 Subject: x86: Add NX protection for kernel data This patch expands functionality of CONFIG_DEBUG_RODATA to set main (static) kernel data area as NX. The following steps are taken to achieve this: 1. Linker script is adjusted so .text always starts and ends on a page bound 2. Linker script is adjusted so .rodata always start and end on a page boundary 3. NX is set for all pages from _etext through _end in mark_rodata_ro. 4. free_init_pages() sets released memory NX in arch/x86/mm/init.c 5. bios rom is set to x when pcibios is used. The results of patch application may be observed in the diff of kernel page table dumps: pcibios: -- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400 ++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400 0x00000000-0xc0000000 3G pmd ---[ Kernel Mapping ]--- -0xc0000000-0xc0100000 1M RW GLB x pte +0xc0000000-0xc00a0000 640K RW GLB NX pte +0xc00a0000-0xc0100000 384K RW GLB x pte -0xc0100000-0xc03d7000 2908K ro GLB x pte +0xc0100000-0xc0318000 2144K ro GLB x pte +0xc0318000-0xc03d7000 764K ro GLB NX pte -0xc03d7000-0xc0600000 2212K RW GLB x pte +0xc03d7000-0xc0600000 2212K RW GLB NX pte 0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd 0xf7a00000-0xf7bfe000 2040K RW GLB NX pte 0xf7bfe000-0xf7c00000 8K pte No pcibios: -- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400 ++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400 0x00000000-0xc0000000 3G pmd ---[ Kernel Mapping ]--- -0xc0000000-0xc0100000 1M RW GLB x pte +0xc0000000-0xc0100000 1M RW GLB NX pte -0xc0100000-0xc03d7000 2908K ro GLB x pte +0xc0100000-0xc0318000 2144K ro GLB x pte +0xc0318000-0xc03d7000 764K ro GLB NX pte -0xc03d7000-0xc0600000 2212K RW GLB x pte +0xc03d7000-0xc0600000 2212K RW GLB NX pte 0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd 0xf7a00000-0xf7bfe000 2040K RW GLB NX pte 0xf7bfe000-0xf7c00000 8K pte The patch has been originally developed for Linux 2.6.34-rc2 x86 by Siarhei Liakh and Xuxian Jiang . -v1: initial patch for 2.6.30 -v2: patch for 2.6.31-rc7 -v3: moved all code into arch/x86, adjusted credits -v4: fixed ifdef, removed credits from CREDITS -v5: fixed an address calculation bug in mark_nxdata_nx() -v6: added acked-by and PT dump diff to commit log -v7: minor adjustments for -tip -v8: rework with the merge of "Set first MB as RW+NX" Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Signed-off-by: Matthieu CASTET Cc: Arjan van de Ven Cc: James Morris Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F82E.60601@free.fr> [ minor cleanliness edits ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e03530aebfd0..bf4700755184 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -69,7 +69,7 @@ jiffies_64 = jiffies; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ + data PT_LOAD FLAGS(6); /* RW_ */ #ifdef CONFIG_X86_64 user PT_LOAD FLAGS(5); /* R_E */ #ifdef CONFIG_SMP @@ -116,6 +116,10 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 +#if defined(CONFIG_DEBUG_RODATA) + /* .text should occupy whole number of pages */ + . = ALIGN(PAGE_SIZE); +#endif X64_ALIGN_DEBUG_RODATA_BEGIN RO_DATA(PAGE_SIZE) X64_ALIGN_DEBUG_RODATA_END @@ -335,7 +339,7 @@ SECTIONS __bss_start = .; *(.bss..page_aligned) *(.bss) - . = ALIGN(4); + . = ALIGN(PAGE_SIZE); __bss_stop = .; } -- cgit v1.2.2 From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Tue, 16 Nov 2010 22:35:16 +0100 Subject: x86: Add RO/NX protection for loadable kernel modules This patch is a logical extension of the protection provided by CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting module_core and module_init into three logical parts each and setting appropriate page access permissions for each individual section: 1. Code: RO+X 2. RO data: RO+NX 3. RW data: RW+NX In order to achieve proper protection, layout_sections() have been modified to align each of the three parts mentioned above onto page boundary. Next, the corresponding page access permissions are set right before successful exit from load_module(). Further, free_module() and sys_init_module have been modified to set module_core and module_init as RW+NX right before calling module_free(). By default, the original section layout and access flags are preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y, the patch will page-align each group of sections to ensure that each page contains only one type of content and will enforce RO/NX for each group of pages. -v1: Initial proof-of-concept patch. -v2: The patch have been re-written to reduce the number of #ifdefs and to make it architecture-agnostic. Code formatting has also been corrected. -v3: Opportunistic RO/NX protection is now unconditional. Section page-alignment is enabled when CONFIG_DEBUG_RODATA=y. -v4: Removed most macros and improved coding style. -v5: Changed page-alignment and RO/NX section size calculation -v6: Fixed comments. Restricted RO/NX enforcement to x86 only -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added calls to set_all_modules_text_rw() and set_all_modules_text_ro() in ftrace -v8: updated for compatibility with linux 2.6.33-rc5 -v9: coding style fixes -v10: more coding style fixes -v11: minor adjustments for -tip -v12: minor adjustments for v2.6.35-rc2-tip -v13: minor adjustments for v2.6.37-rc1-tip Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Acked-by: Arjan van de Ven Reviewed-by: James Morris Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F914.9070106@free.fr> [ minor cleanliness edits, -v14: build failure fix ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2d..298448656b60 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; + set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } -- cgit v1.2.2 From eec1d4fa00c6552ae2fdf71d59f1eded7c88dd89 Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Fri, 29 Oct 2010 17:14:30 +0200 Subject: x86, amd-nb: Complete the rename of AMD NB and related code Not only the naming of the files was confusing, it was even more so for the function and variable names. Renamed the K8 NB and NUMA stuff that is also used on other AMD platforms. This also renames the CONFIG_K8_NUMA option to CONFIG_AMD_NUMA and the related file k8topology_64.c to amdtopology_64.c. No functional changes intended. Signed-off-by: Hans Rosenfeld Signed-off-by: Borislav Petkov --- arch/x86/kernel/amd_nb.c | 72 +++++++++++++++++------------------ arch/x86/kernel/aperture_64.c | 10 ++--- arch/x86/kernel/cpu/intel_cacheinfo.c | 6 +-- arch/x86/kernel/pci-gart_64.c | 34 ++++++++--------- arch/x86/kernel/setup.c | 8 ++-- 5 files changed, 65 insertions(+), 65 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 8f6463d8ed0d..c46df406a2a9 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -12,95 +12,95 @@ static u32 *flush_words; -struct pci_device_id k8_nb_ids[] = { +struct pci_device_id amd_nb_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, {} }; -EXPORT_SYMBOL(k8_nb_ids); +EXPORT_SYMBOL(amd_nb_ids); -struct k8_northbridge_info k8_northbridges; -EXPORT_SYMBOL(k8_northbridges); +struct amd_northbridge_info amd_northbridges; +EXPORT_SYMBOL(amd_northbridges); -static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) +static struct pci_dev *next_amd_northbridge(struct pci_dev *dev) { do { dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); if (!dev) break; - } while (!pci_match_id(&k8_nb_ids[0], dev)); + } while (!pci_match_id(&amd_nb_ids[0], dev)); return dev; } -int cache_k8_northbridges(void) +int cache_amd_northbridges(void) { int i; struct pci_dev *dev; - if (k8_northbridges.num) + if (amd_northbridges.num) return 0; dev = NULL; - while ((dev = next_k8_northbridge(dev)) != NULL) - k8_northbridges.num++; + while ((dev = next_amd_northbridge(dev)) != NULL) + amd_northbridges.num++; /* some CPU families (e.g. family 0x11) do not support GART */ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x15) - k8_northbridges.gart_supported = 1; + amd_northbridges.gart_supported = 1; - k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * + amd_northbridges.nb_misc = kmalloc((amd_northbridges.num + 1) * sizeof(void *), GFP_KERNEL); - if (!k8_northbridges.nb_misc) + if (!amd_northbridges.nb_misc) return -ENOMEM; - if (!k8_northbridges.num) { - k8_northbridges.nb_misc[0] = NULL; + if (!amd_northbridges.num) { + amd_northbridges.nb_misc[0] = NULL; return 0; } - if (k8_northbridges.gart_supported) { - flush_words = kmalloc(k8_northbridges.num * sizeof(u32), + if (amd_northbridges.gart_supported) { + flush_words = kmalloc(amd_northbridges.num * sizeof(u32), GFP_KERNEL); if (!flush_words) { - kfree(k8_northbridges.nb_misc); + kfree(amd_northbridges.nb_misc); return -ENOMEM; } } dev = NULL; i = 0; - while ((dev = next_k8_northbridge(dev)) != NULL) { - k8_northbridges.nb_misc[i] = dev; - if (k8_northbridges.gart_supported) + while ((dev = next_amd_northbridge(dev)) != NULL) { + amd_northbridges.nb_misc[i] = dev; + if (amd_northbridges.gart_supported) pci_read_config_dword(dev, 0x9c, &flush_words[i++]); } - k8_northbridges.nb_misc[i] = NULL; + amd_northbridges.nb_misc[i] = NULL; return 0; } -EXPORT_SYMBOL_GPL(cache_k8_northbridges); +EXPORT_SYMBOL_GPL(cache_amd_northbridges); /* Ignores subdevice/subvendor but as far as I can figure out they're useless anyways */ -int __init early_is_k8_nb(u32 device) +int __init early_is_amd_nb(u32 device) { struct pci_device_id *id; u32 vendor = device & 0xffff; device >>= 16; - for (id = k8_nb_ids; id->vendor; id++) + for (id = amd_nb_ids; id->vendor; id++) if (vendor == id->vendor && device == id->device) return 1; return 0; } -void k8_flush_garts(void) +void amd_flush_garts(void) { int flushed, i; unsigned long flags; static DEFINE_SPINLOCK(gart_lock); - if (!k8_northbridges.gart_supported) + if (!amd_northbridges.gart_supported) return; /* Avoid races between AGP and IOMMU. In theory it's not needed @@ -109,16 +109,16 @@ void k8_flush_garts(void) that it doesn't matter to serialize more. -AK */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; - for (i = 0; i < k8_northbridges.num; i++) { - pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, + for (i = 0; i < amd_northbridges.num; i++) { + pci_write_config_dword(amd_northbridges.nb_misc[i], 0x9c, flush_words[i]|1); flushed++; } - for (i = 0; i < k8_northbridges.num; i++) { + for (i = 0; i < amd_northbridges.num; i++) { u32 w; /* Make sure the hardware actually executed the flush*/ for (;;) { - pci_read_config_dword(k8_northbridges.nb_misc[i], + pci_read_config_dword(amd_northbridges.nb_misc[i], 0x9c, &w); if (!(w & 1)) break; @@ -129,19 +129,19 @@ void k8_flush_garts(void) if (!flushed) printk("nothing to flush?\n"); } -EXPORT_SYMBOL_GPL(k8_flush_garts); +EXPORT_SYMBOL_GPL(amd_flush_garts); -static __init int init_k8_nbs(void) +static __init int init_amd_nbs(void) { int err = 0; - err = cache_k8_northbridges(); + err = cache_amd_northbridges(); if (err < 0) - printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); + printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); return err; } /* This has to go after the PCI subsystem */ -fs_initcall(init_k8_nbs); +fs_initcall(init_amd_nbs); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index b3a16e8f0703..dcd7c83e1659 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) * Do an PCI bus scan by hand because we're running before the PCI * subsystem. * - * All K8 AGP bridges are AGPv3 compliant, so we can do this scan + * All AMD AGP bridges are AGPv3 compliant, so we can do this scan * generically. It's probably overkill to always scan all slots because * the AGP bridges should be always an own bus on the HT hierarchy, * but do it here for future safety. @@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void) dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); @@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void) dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); @@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void) dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; iommu_detected = 1; @@ -518,7 +518,7 @@ out: dev_base = bus_dev_ranges[i].dev_base; dev_limit = bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 17ad03366211..92512ed380e7 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -333,7 +333,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) { struct amd_l3_cache *l3; - struct pci_dev *dev = node_to_k8_nb_misc(node); + struct pci_dev *dev = node_to_amd_nb_misc(node); l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); if (!l3) { @@ -370,7 +370,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, return; /* not in virtualized environments */ - if (k8_northbridges.num == 0) + if (amd_northbridges.num == 0) return; /* @@ -378,7 +378,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, * never freed but this is done only on shutdown so it doesn't matter. */ if (!l3_caches) { - int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); + int size = amd_northbridges.num * sizeof(struct amd_l3_cache *); l3_caches = kzalloc(size, GFP_ATOMIC); if (!l3_caches) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ba0f0ca9f280..63317c5694d7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -143,7 +143,7 @@ static void flush_gart(void) spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { - k8_flush_garts(); + amd_flush_garts(); need_flush = false; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); @@ -561,17 +561,17 @@ static void enable_gart_translations(void) { int i; - if (!k8_northbridges.gart_supported) + if (!amd_northbridges.gart_supported) return; - for (i = 0; i < k8_northbridges.num; i++) { - struct pci_dev *dev = k8_northbridges.nb_misc[i]; + for (i = 0; i < amd_northbridges.num; i++) { + struct pci_dev *dev = amd_northbridges.nb_misc[i]; enable_gart_translation(dev, __pa(agp_gatt_table)); } /* Flush the GART-TLB to remove stale entries */ - k8_flush_garts(); + amd_flush_garts(); } /* @@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev) if (!fix_up_north_bridges) return; - if (!k8_northbridges.gart_supported) + if (!amd_northbridges.gart_supported) return; pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < k8_northbridges.num; i++) { - struct pci_dev *dev = k8_northbridges.nb_misc[i]; + for (i = 0; i < amd_northbridges.num; i++) { + struct pci_dev *dev = amd_northbridges.nb_misc[i]; /* * Don't enable translations just yet. That is the next @@ -644,7 +644,7 @@ static struct sys_device device_gart = { * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. */ -static __init int init_k8_gatt(struct agp_kern_info *info) +static __init int init_amd_gatt(struct agp_kern_info *info) { unsigned aper_size, gatt_size, new_aper_size; unsigned aper_base, new_aper_base; @@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) aper_size = aper_base = info->aper_size = 0; dev = NULL; - for (i = 0; i < k8_northbridges.num; i++) { - dev = k8_northbridges.nb_misc[i]; + for (i = 0; i < amd_northbridges.num; i++) { + dev = amd_northbridges.nb_misc[i]; new_aper_base = read_aperture(dev, &new_aper_size); if (!new_aper_base) goto nommu; @@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void) if (!no_agp) return; - if (!k8_northbridges.gart_supported) + if (!amd_northbridges.gart_supported) return; - for (i = 0; i < k8_northbridges.num; i++) { + for (i = 0; i < amd_northbridges.num; i++) { u32 ctl; - dev = k8_northbridges.nb_misc[i]; + dev = amd_northbridges.nb_misc[i]; pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); ctl &= ~GARTEN; @@ -749,14 +749,14 @@ int __init gart_iommu_init(void) unsigned long scratch; long i; - if (!k8_northbridges.gart_supported) + if (!amd_northbridges.gart_supported) return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; #else /* Makefile puts PCI initialization via subsys_initcall first. */ - /* Add other K8 AGP bridge drivers here */ + /* Add other AMD AGP bridge drivers here */ no_agp = no_agp || (agp_amd64_init() < 0) || (agp_copy_info(agp_bridge, &info) < 0); @@ -765,7 +765,7 @@ int __init gart_iommu_init(void) if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || - (no_agp && init_k8_gatt(&info) < 0)) { + (no_agp && init_amd_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); pr_warning("falling back to iommu=soft.\n"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 21c6746338af..df172c1e8238 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -694,7 +694,7 @@ static u64 __init get_max_mapped(void) void __init setup_arch(char **cmdline_p) { int acpi = 0; - int k8 = 0; + int amd = 0; unsigned long flags; #ifdef CONFIG_X86_32 @@ -981,12 +981,12 @@ void __init setup_arch(char **cmdline_p) acpi = acpi_numa_init(); #endif -#ifdef CONFIG_K8_NUMA +#ifdef CONFIG_AMD_NUMA if (!acpi) - k8 = !k8_numa_init(0, max_pfn); + amd = !amd_numa_init(0, max_pfn); #endif - initmem_init(0, max_pfn, acpi, k8); + initmem_init(0, max_pfn, acpi, amd); memblock_find_dma_reserve(); dma32_reserve_bootmem(); -- cgit v1.2.2 From 9653a5c76c8677b05b45b3b999d3b39988d2a064 Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Fri, 29 Oct 2010 17:14:31 +0200 Subject: x86, amd-nb: Cleanup AMD northbridge caching code Support more than just the "Misc Control" part of the northbridges. Support more flags by turning "gart_supported" into a single bit flag that is stored in a flags member. Clean up related code by using a set of functions (amd_nb_num(), amd_nb_has_feature() and node_to_amd_nb()) instead of accessing the NB data structures directly. Reorder the initialization code and put the GART flush words caching in a separate function. Signed-off-by: Hans Rosenfeld Signed-off-by: Borislav Petkov --- arch/x86/kernel/amd_nb.c | 109 +++++++++++++++++++--------------- arch/x86/kernel/cpu/intel_cacheinfo.c | 6 +- arch/x86/kernel/pci-gart_64.c | 24 ++++---- 3 files changed, 77 insertions(+), 62 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c46df406a2a9..63c8b4f2c1ad 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -12,74 +12,65 @@ static u32 *flush_words; -struct pci_device_id amd_nb_ids[] = { +struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, {} }; -EXPORT_SYMBOL(amd_nb_ids); +EXPORT_SYMBOL(amd_nb_misc_ids); struct amd_northbridge_info amd_northbridges; EXPORT_SYMBOL(amd_northbridges); -static struct pci_dev *next_amd_northbridge(struct pci_dev *dev) +static struct pci_dev *next_northbridge(struct pci_dev *dev, + struct pci_device_id *ids) { do { dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); if (!dev) break; - } while (!pci_match_id(&amd_nb_ids[0], dev)); + } while (!pci_match_id(ids, dev)); return dev; } -int cache_amd_northbridges(void) +int amd_cache_northbridges(void) { - int i; - struct pci_dev *dev; + int i = 0; + struct amd_northbridge *nb; + struct pci_dev *misc; - if (amd_northbridges.num) + if (amd_nb_num()) return 0; - dev = NULL; - while ((dev = next_amd_northbridge(dev)) != NULL) - amd_northbridges.num++; + misc = NULL; + while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) + i++; - /* some CPU families (e.g. family 0x11) do not support GART */ - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || - boot_cpu_data.x86 == 0x15) - amd_northbridges.gart_supported = 1; + if (i == 0) + return 0; - amd_northbridges.nb_misc = kmalloc((amd_northbridges.num + 1) * - sizeof(void *), GFP_KERNEL); - if (!amd_northbridges.nb_misc) + nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); + if (!nb) return -ENOMEM; - if (!amd_northbridges.num) { - amd_northbridges.nb_misc[0] = NULL; - return 0; - } + amd_northbridges.nb = nb; + amd_northbridges.num = i; - if (amd_northbridges.gart_supported) { - flush_words = kmalloc(amd_northbridges.num * sizeof(u32), - GFP_KERNEL); - if (!flush_words) { - kfree(amd_northbridges.nb_misc); - return -ENOMEM; - } - } + misc = NULL; + for (i = 0; i != amd_nb_num(); i++) { + node_to_amd_nb(i)->misc = misc = + next_northbridge(misc, amd_nb_misc_ids); + } + + /* some CPU families (e.g. family 0x11) do not support GART */ + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || + boot_cpu_data.x86 == 0x15) + amd_northbridges.flags |= AMD_NB_GART; - dev = NULL; - i = 0; - while ((dev = next_amd_northbridge(dev)) != NULL) { - amd_northbridges.nb_misc[i] = dev; - if (amd_northbridges.gart_supported) - pci_read_config_dword(dev, 0x9c, &flush_words[i++]); - } - amd_northbridges.nb_misc[i] = NULL; return 0; } -EXPORT_SYMBOL_GPL(cache_amd_northbridges); +EXPORT_SYMBOL_GPL(amd_cache_northbridges); /* Ignores subdevice/subvendor but as far as I can figure out they're useless anyways */ @@ -88,19 +79,39 @@ int __init early_is_amd_nb(u32 device) struct pci_device_id *id; u32 vendor = device & 0xffff; device >>= 16; - for (id = amd_nb_ids; id->vendor; id++) + for (id = amd_nb_misc_ids; id->vendor; id++) if (vendor == id->vendor && device == id->device) return 1; return 0; } +int amd_cache_gart(void) +{ + int i; + + if (!amd_nb_has_feature(AMD_NB_GART)) + return 0; + + flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); + if (!flush_words) { + amd_northbridges.flags &= ~AMD_NB_GART; + return -ENOMEM; + } + + for (i = 0; i != amd_nb_num(); i++) + pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, + &flush_words[i]); + + return 0; +} + void amd_flush_garts(void) { int flushed, i; unsigned long flags; static DEFINE_SPINLOCK(gart_lock); - if (!amd_northbridges.gart_supported) + if (!amd_nb_has_feature(AMD_NB_GART)) return; /* Avoid races between AGP and IOMMU. In theory it's not needed @@ -109,16 +120,16 @@ void amd_flush_garts(void) that it doesn't matter to serialize more. -AK */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; - for (i = 0; i < amd_northbridges.num; i++) { - pci_write_config_dword(amd_northbridges.nb_misc[i], 0x9c, - flush_words[i]|1); + for (i = 0; i < amd_nb_num(); i++) { + pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, + flush_words[i] | 1); flushed++; } - for (i = 0; i < amd_northbridges.num; i++) { + for (i = 0; i < amd_nb_num(); i++) { u32 w; /* Make sure the hardware actually executed the flush*/ for (;;) { - pci_read_config_dword(amd_northbridges.nb_misc[i], + pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &w); if (!(w & 1)) break; @@ -135,11 +146,15 @@ static __init int init_amd_nbs(void) { int err = 0; - err = cache_amd_northbridges(); + err = amd_cache_northbridges(); if (err < 0) printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); + if (amd_cache_gart() < 0) + printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " + "GART support disabled.\n"); + return err; } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 92512ed380e7..6b8ea7434972 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -333,7 +333,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) { struct amd_l3_cache *l3; - struct pci_dev *dev = node_to_amd_nb_misc(node); + struct pci_dev *dev = node_to_amd_nb(node)->misc; l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); if (!l3) { @@ -370,7 +370,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, return; /* not in virtualized environments */ - if (amd_northbridges.num == 0) + if (amd_nb_num() == 0) return; /* @@ -378,7 +378,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, * never freed but this is done only on shutdown so it doesn't matter. */ if (!l3_caches) { - int size = amd_northbridges.num * sizeof(struct amd_l3_cache *); + int size = amd_nb_num() * sizeof(struct amd_l3_cache *); l3_caches = kzalloc(size, GFP_ATOMIC); if (!l3_caches) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 63317c5694d7..c01ffa5b9b87 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -561,11 +561,11 @@ static void enable_gart_translations(void) { int i; - if (!amd_northbridges.gart_supported) + if (!amd_nb_has_feature(AMD_NB_GART)) return; - for (i = 0; i < amd_northbridges.num; i++) { - struct pci_dev *dev = amd_northbridges.nb_misc[i]; + for (i = 0; i < amd_nb_num(); i++) { + struct pci_dev *dev = node_to_amd_nb(i)->misc; enable_gart_translation(dev, __pa(agp_gatt_table)); } @@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev) if (!fix_up_north_bridges) return; - if (!amd_northbridges.gart_supported) + if (!amd_nb_has_feature(AMD_NB_GART)) return; pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < amd_northbridges.num; i++) { - struct pci_dev *dev = amd_northbridges.nb_misc[i]; + for (i = 0; i < amd_nb_num(); i++) { + struct pci_dev *dev = node_to_amd_nb(i)->misc; /* * Don't enable translations just yet. That is the next @@ -656,8 +656,8 @@ static __init int init_amd_gatt(struct agp_kern_info *info) aper_size = aper_base = info->aper_size = 0; dev = NULL; - for (i = 0; i < amd_northbridges.num; i++) { - dev = amd_northbridges.nb_misc[i]; + for (i = 0; i < amd_nb_num(); i++) { + dev = node_to_amd_nb(i)->misc; new_aper_base = read_aperture(dev, &new_aper_size); if (!new_aper_base) goto nommu; @@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void) if (!no_agp) return; - if (!amd_northbridges.gart_supported) + if (!amd_nb_has_feature(AMD_NB_GART)) return; - for (i = 0; i < amd_northbridges.num; i++) { + for (i = 0; i < amd_nb_num(); i++) { u32 ctl; - dev = amd_northbridges.nb_misc[i]; + dev = node_to_amd_nb(i)->misc; pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); ctl &= ~GARTEN; @@ -749,7 +749,7 @@ int __init gart_iommu_init(void) unsigned long scratch; long i; - if (!amd_northbridges.gart_supported) + if (!amd_nb_has_feature(AMD_NB_GART)) return 0; #ifndef CONFIG_AGP_AMD64 -- cgit v1.2.2 From f658bcfb2607bf0808966a69cf74135ce98e5c2d Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Fri, 29 Oct 2010 17:14:32 +0200 Subject: x86, cacheinfo: Cleanup L3 cache index disable support Adaptions to the changes of the AMD northbridge caching code: instead of a bool in each l3 struct, use a flag in amd_northbridges.flags to indicate L3 cache index disable support; use a pointer to the whole northbridge instead of the misc device in the l3 struct; simplify the initialisation; dynamically generate sysfs attribute array. Signed-off-by: Hans Rosenfeld Signed-off-by: Borislav Petkov --- arch/x86/kernel/amd_nb.c | 10 +++ arch/x86/kernel/cpu/intel_cacheinfo.c | 147 +++++++++++++++------------------- 2 files changed, 73 insertions(+), 84 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 63c8b4f2c1ad..affacb5e0065 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -68,6 +68,16 @@ int amd_cache_northbridges(void) boot_cpu_data.x86 == 0x15) amd_northbridges.flags |= AMD_NB_GART; + /* + * Some CPU families support L3 Cache Index Disable. There are some + * limitations because of E382 and E388 on family 0x10. + */ + if (boot_cpu_data.x86 == 0x10 && + boot_cpu_data.x86_model >= 0x8 && + (boot_cpu_data.x86_model > 0x9 || + boot_cpu_data.x86_mask >= 0x1)) + amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; + return 0; } EXPORT_SYMBOL_GPL(amd_cache_northbridges); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6b8ea7434972..9ecf81f9b90f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx { }; struct amd_l3_cache { - struct pci_dev *dev; - bool can_disable; + struct amd_northbridge *nb; unsigned indices; u8 subcaches[4]; }; @@ -311,14 +310,12 @@ struct _cache_attr { /* * L3 cache descriptors */ -static struct amd_l3_cache **__cpuinitdata l3_caches; - static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) { unsigned int sc0, sc1, sc2, sc3; u32 val = 0; - pci_read_config_dword(l3->dev, 0x1C4, &val); + pci_read_config_dword(l3->nb->misc, 0x1C4, &val); /* calculate subcache sizes */ l3->subcaches[0] = sc0 = !(val & BIT(0)); @@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; } -static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) -{ - struct amd_l3_cache *l3; - struct pci_dev *dev = node_to_amd_nb(node)->misc; - - l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); - if (!l3) { - printk(KERN_WARNING "Error allocating L3 struct\n"); - return NULL; - } - - l3->dev = dev; - - amd_calc_l3_indices(l3); - - return l3; -} - -static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, - int index) +static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, + int index) { + static struct amd_l3_cache *__cpuinitdata l3_caches; int node; - if (boot_cpu_data.x86 != 0x10) - return; - - if (index < 3) - return; - - /* see errata #382 and #388 */ - if (boot_cpu_data.x86_model < 0x8) - return; - - if ((boot_cpu_data.x86_model == 0x8 || - boot_cpu_data.x86_model == 0x9) - && - boot_cpu_data.x86_mask < 0x1) - return; - - /* not in virtualized environments */ - if (amd_nb_num() == 0) + /* only for L3, and not in virtualized environments */ + if (index < 3 || amd_nb_num() == 0) return; /* @@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, * never freed but this is done only on shutdown so it doesn't matter. */ if (!l3_caches) { - int size = amd_nb_num() * sizeof(struct amd_l3_cache *); + int size = amd_nb_num() * sizeof(struct amd_l3_cache); l3_caches = kzalloc(size, GFP_ATOMIC); if (!l3_caches) @@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, node = amd_get_nb_id(smp_processor_id()); - if (!l3_caches[node]) { - l3_caches[node] = amd_init_l3_cache(node); - l3_caches[node]->can_disable = true; + if (!l3_caches[node].nb) { + l3_caches[node].nb = node_to_amd_nb(node); + amd_calc_l3_indices(&l3_caches[node]); } - WARN_ON(!l3_caches[node]); - - this_leaf->l3 = l3_caches[node]; + this_leaf->l3 = &l3_caches[node]; } /* @@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) { unsigned int reg = 0; - pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); + pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, ®); /* check whether this slot is activated already */ if (reg & (3UL << 30)) @@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, { int index; - if (!this_leaf->l3 || !this_leaf->l3->can_disable) + if (!this_leaf->l3 || + !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) return -EINVAL; index = amd_get_l3_disable_slot(this_leaf->l3, slot); @@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, if (!l3->subcaches[i]) continue; - pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); + pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); /* * We need to WBINVD on a core on the node containing the L3 @@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, wbinvd_on_cpu(cpu); reg |= BIT(31); - pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); + pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); } } @@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!this_leaf->l3 || !this_leaf->l3->can_disable) + if (!this_leaf->l3 || + !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) return -EINVAL; cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); @@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, #define STORE_CACHE_DISABLE(slot) \ static ssize_t \ store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ - const char *buf, size_t count) \ + const char *buf, size_t count) \ { \ return store_cache_disable(this_leaf, buf, count, slot); \ } @@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1); #else /* CONFIG_AMD_NB */ -static void __cpuinit -amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) -{ -}; +#define amd_init_l3_cache(x, y) #endif /* CONFIG_AMD_NB */ static int @@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - amd_check_l3_disable(this_leaf, index); + amd_init_l3_cache(this_leaf, index); } else { cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); } @@ -983,30 +944,48 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); -#define DEFAULT_SYSFS_CACHE_ATTRS \ - &type.attr, \ - &level.attr, \ - &coherency_line_size.attr, \ - &physical_line_partition.attr, \ - &ways_of_associativity.attr, \ - &number_of_sets.attr, \ - &size.attr, \ - &shared_cpu_map.attr, \ - &shared_cpu_list.attr - static struct attribute *default_attrs[] = { - DEFAULT_SYSFS_CACHE_ATTRS, + &type.attr, + &level.attr, + &coherency_line_size.attr, + &physical_line_partition.attr, + &ways_of_associativity.attr, + &number_of_sets.attr, + &size.attr, + &shared_cpu_map.attr, + &shared_cpu_list.attr, NULL }; -static struct attribute *default_l3_attrs[] = { - DEFAULT_SYSFS_CACHE_ATTRS, #ifdef CONFIG_AMD_NB - &cache_disable_0.attr, - &cache_disable_1.attr, +static struct attribute ** __cpuinit amd_l3_attrs(void) +{ + static struct attribute **attrs; + int n; + + if (attrs) + return attrs; + + n = sizeof (default_attrs) / sizeof (struct attribute *); + + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + n += 2; + + attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); + if (attrs == NULL) + return attrs = default_attrs; + + for (n = 0; default_attrs[n]; n++) + attrs[n] = default_attrs[n]; + + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { + attrs[n++] = &cache_disable_0.attr; + attrs[n++] = &cache_disable_1.attr; + } + + return attrs; +} #endif - NULL -}; static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_leaf = CPUID4_INFO_IDX(cpu, i); - if (this_leaf->l3 && this_leaf->l3->can_disable) - ktype_cache.default_attrs = default_l3_attrs; - else - ktype_cache.default_attrs = default_attrs; - + ktype_cache.default_attrs = default_attrs; +#ifdef CONFIG_AMD_NB + if (this_leaf->l3) + ktype_cache.default_attrs = amd_l3_attrs(); +#endif retval = kobject_init_and_add(&(this_object->kobj), &ktype_cache, per_cpu(ici_cache_kobject, cpu), -- cgit v1.2.2 From 9cdca869724e766eb48c061967cb777ddb436c76 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 20 Nov 2010 10:37:05 +0100 Subject: x86: platform: Move iris to x86/platform where it belongs Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/iris.c | 91 ------------------------------------------------ 2 files changed, 92 deletions(-) delete mode 100644 arch/x86/kernel/iris.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index beac17a0fcab..9e13763b6092 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -109,7 +109,6 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o -obj-$(CONFIG_X86_32_IRIS) += iris.o ### # 64 bit specific files diff --git a/arch/x86/kernel/iris.c b/arch/x86/kernel/iris.c deleted file mode 100644 index 1ba7f5ed8c9b..000000000000 --- a/arch/x86/kernel/iris.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Eurobraille/Iris power off support. - * - * Eurobraille's Iris machine is a PC with no APM or ACPI support. - * It is shutdown by a special I/O sequence which this module provides. - * - * Copyright (C) Shérab - * - * This program is free software ; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation ; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY ; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with the program ; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define IRIS_GIO_BASE 0x340 -#define IRIS_GIO_INPUT IRIS_GIO_BASE -#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1) -#define IRIS_GIO_PULSE 0x80 /* First byte to send */ -#define IRIS_GIO_REST 0x00 /* Second byte to send */ -#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */ - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sébastien Hinderer "); -MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); -MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); - -static int force; - -module_param(force, bool, 0); -MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); - -static void (*old_pm_power_off)(void); - -static void iris_power_off(void) -{ - outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT); - msleep(850); - outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT); -} - -/* - * Before installing the power_off handler, try to make sure the OS is - * running on an Iris. Since Iris does not support DMI, this is done - * by reading its input port and seeing whether the read value is - * meaningful. - */ -static int iris_init(void) -{ - unsigned char status; - if (force != 1) { - printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n"); - return -ENODEV; - } - status = inb(IRIS_GIO_INPUT); - if (status == IRIS_GIO_NODEV) { - printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n"); - return -ENODEV; - } - old_pm_power_off = pm_power_off; - pm_power_off = &iris_power_off; - printk(KERN_INFO "Iris power_off handler installed.\n"); - - return 0; -} - -static void iris_exit(void) -{ - pm_power_off = old_pm_power_off; - printk(KERN_INFO "Iris power_off handler uninstalled.\n"); -} - -module_init(iris_init); -module_exit(iris_exit); -- cgit v1.2.2 From 08ec0c58fb8a05d3191d5cb6f5d6f81adb419798 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 27 Jul 2010 17:00:00 -0700 Subject: x86: Improve TSC calibration using a delayed workqueue Boot to boot the TSC calibration may vary by quite a large amount. While normal variance of 50-100ppm can easily be seen, the quick calibration code only requires 500ppm accuracy, which is the limit of what NTP can correct for. This can cause problems for systems being used as NTP servers, as every time they reboot it can take hours for them to calculate the new drift error caused by the calibration. The classic trade-off here is calibration accuracy vs slow boot times, as during the calibration nothing else can run. This patch uses a delayed workqueue to calibrate the TSC over the period of a second. This allows very accurate calibration (in my tests only varying by 1khz or 0.4ppm boot to boot). Additionally this refined calibration step does not block the boot process, and only delays the TSC clocksoure registration by a few seconds in early boot. If the refined calibration strays 1% from the early boot calibration value, the system will fall back to already calculated early boot calibration. Credit to Andi Kleen who suggested using a timer quite awhile back, but I dismissed it thinking the timer calibration would be done after the clocksource was registered (which would break things). Forgive me for my short-sightedness. This patch has worked very well in my testing, but TSC hardware is quite varied so it would probably be good to get some extended testing, possibly pushing inclusion out to 2.6.39. Signed-off-by: John Stultz LKML-Reference: <1289003985-29060-1-git-send-email-johnstul@us.ibm.com> Reviewed-by: Thomas Gleixner CC: Thomas Gleixner CC: Ingo Molnar CC: Martin Schwidefsky CC: Clark Williams CC: Andi Kleen --- arch/x86/kernel/tsc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index bb64beb301d9..dc1393e7cbfb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -888,7 +888,82 @@ __cpuinit int unsynchronized_tsc(void) return 0; } -static void __init init_tsc_clocksource(void) + +static void tsc_refine_calibration_work(struct work_struct *work); +static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); +/** + * tsc_refine_calibration_work - Further refine tsc freq calibration + * @work - ignored. + * + * This functions uses delayed work over a period of a + * second to further refine the TSC freq value. Since this is + * timer based, instead of loop based, we don't block the boot + * process while this longer calibration is done. + * + * If there are any calibration anomolies (too many SMIs, etc), + * or the refined calibration is off by 1% of the fast early + * calibration, we throw out the new calibration and use the + * early calibration. + */ +static void tsc_refine_calibration_work(struct work_struct *work) +{ + static u64 tsc_start = -1, ref_start; + static int hpet; + u64 tsc_stop, ref_stop, delta; + unsigned long freq; + + /* Don't bother refining TSC on unstable systems */ + if (check_tsc_unstable()) + goto out; + + /* + * Since the work is started early in boot, we may be + * delayed the first time we expire. So set the workqueue + * again once we know timers are working. + */ + if (tsc_start == -1) { + /* + * Only set hpet once, to avoid mixing hardware + * if the hpet becomes enabled later. + */ + hpet = is_hpet_enabled(); + schedule_delayed_work(&tsc_irqwork, HZ); + tsc_start = tsc_read_refs(&ref_start, hpet); + return; + } + + tsc_stop = tsc_read_refs(&ref_stop, hpet); + + /* hpet or pmtimer available ? */ + if (!hpet && !ref_start && !ref_stop) + goto out; + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) + goto out; + + delta = tsc_stop - tsc_start; + delta *= 1000000LL; + if (hpet) + freq = calc_hpet_ref(delta, ref_start, ref_stop); + else + freq = calc_pmtimer_ref(delta, ref_start, ref_stop); + + /* Make sure we're within 1% */ + if (abs(tsc_khz - freq) > tsc_khz/100) + goto out; + + tsc_khz = freq; + printk(KERN_INFO "Refined TSC clocksource calibration: " + "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); + +out: + clocksource_register_khz(&clocksource_tsc, tsc_khz); +} + + +static int __init init_tsc_clocksource(void) { if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; @@ -897,8 +972,14 @@ static void __init init_tsc_clocksource(void) clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } - clocksource_register_khz(&clocksource_tsc, tsc_khz); + schedule_delayed_work(&tsc_irqwork, 0); + return 0; } +/* + * We use device_initcall here, to ensure we run after the hpet + * is fully initialized, which may occur at fs_initcall time. + */ +device_initcall(init_tsc_clocksource); void __init tsc_init(void) { @@ -952,6 +1033,5 @@ void __init tsc_init(void) mark_tsc_unstable("TSCs unsynchronized"); check_system_tsc_reliable(); - init_tsc_clocksource(); } -- cgit v1.2.2 From a38c5380ef9f088be9f49b6e4c5d80af8b1b5cd4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Nov 2010 17:50:20 +0100 Subject: x86: io_apic: Split setup_ioapic_ids_from_mpc() Sodaville needs to setup the IO_APIC ids as the boot loader leaves them uninitialized. Split out the setter function so it can be called unconditionally from the sodaville board code. Signed-off-by: Sebastian Andrzej Siewior Cc: Yinghai Lu LKML-Reference: <20101126165020.GA26361@www.tglx.de> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ce3c6fb4f357..4f026a632c95 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1934,8 +1934,7 @@ void disable_IO_APIC(void) * * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ - -void __init setup_ioapic_ids_from_mpc(void) +void __init setup_ioapic_ids_from_mpc_nocheck(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; @@ -1944,15 +1943,6 @@ void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; - if (acpi_ioapic) - return; - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. @@ -2006,7 +1996,6 @@ void __init setup_ioapic_ids_from_mpc(void) physids_or(phys_id_present_map, phys_id_present_map, tmp); } - /* * We need to adjust the IRQ routing table * if the ID changed. @@ -2042,6 +2031,21 @@ void __init setup_ioapic_ids_from_mpc(void) apic_printk(APIC_VERBOSE, " ok.\n"); } } + +void __init setup_ioapic_ids_from_mpc(void) +{ + + if (acpi_ioapic) + return; + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + setup_ioapic_ids_from_mpc_nocheck(); +} #endif int no_timer_check __initdata; -- cgit v1.2.2 From e4d2ebcab11b308b5b59073578efd33eccd55d46 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 3 Dec 2010 11:51:38 +0800 Subject: x86, apbt: Setup affinity for apb timers acting as per-cpu timer Commit a5ef2e70 "x86: Sanitize apb timer interrupt handling" forgot the affinity setup when cleaning up the code, this patch just adds the forgotten part Signed-off-by: Feng Tang Cc: Jacob Pan Cc: Alan Cox LKML-Reference: <1291348298-21263-2-git-send-email-feng.tang@intel.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apb_timer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 92543c73cf8e..7c9ab59653e8 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev) if (system_state == SYSTEM_BOOTING) { irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); + irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); /* APB timer irqs are set up as mp_irqs, timer is edge type */ __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); if (request_irq(adev->irq, apbt_interrupt_handler, -- cgit v1.2.2 From 991cfffa7c19aa648546aff666595af896e568ba Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 3 Dec 2010 11:51:37 +0800 Subject: x86, earlyprintk: Move mrst early console to platform/ and fix a typo Move the code to arch/x86/platform/mrst/. Also fix a typo to use the correct config option: ONFIG_EARLY_PRINTK_MRST Signed-off-by: Feng Tang Cc: alan@linux.intel.com LKML-Reference: <1291348298-21263-1-git-send-email-feng.tang@intel.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/early_printk.c | 3 +- arch/x86/kernel/early_printk_mrst.c | 319 ------------------------------------ 3 files changed, 1 insertion(+), 322 deletions(-) delete mode 100644 arch/x86/kernel/early_printk_mrst.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9e13763b6092..f60153d5de57 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -84,7 +84,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_APB_TIMER) += apb_timer.o diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 4572f25f9325..cd28a350f7f9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_X86_MRST_EARLY_PRINTK +#ifdef CONFIG_EARLY_PRINTK_MRST if (!strncmp(buf, "mrst", 4)) { mrst_early_console_init(); early_console_register(&early_mrst_console, keep); @@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf) hsu_early_console_init(); early_console_register(&early_hsu_console, keep); } - #endif buf++; } diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c deleted file mode 100644 index 65df603622b2..000000000000 --- a/arch/x86/kernel/early_printk_mrst.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * early_printk_mrst.c - early consoles for Intel MID platforms - * - * Copyright (c) 2008-2010, Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* - * This file implements two early consoles named mrst and hsu. - * mrst is based on Maxim3110 spi-uart device, it exists in both - * Moorestown and Medfield platforms, while hsu is based on a High - * Speed UART device which only exists in the Medfield platform - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define MRST_SPI_TIMEOUT 0x200000 -#define MRST_REGBASE_SPI0 0xff128000 -#define MRST_REGBASE_SPI1 0xff128400 -#define MRST_CLK_SPI0_REG 0xff11d86c - -/* Bit fields in CTRLR0 */ -#define SPI_DFS_OFFSET 0 - -#define SPI_FRF_OFFSET 4 -#define SPI_FRF_SPI 0x0 -#define SPI_FRF_SSP 0x1 -#define SPI_FRF_MICROWIRE 0x2 -#define SPI_FRF_RESV 0x3 - -#define SPI_MODE_OFFSET 6 -#define SPI_SCPH_OFFSET 6 -#define SPI_SCOL_OFFSET 7 -#define SPI_TMOD_OFFSET 8 -#define SPI_TMOD_TR 0x0 /* xmit & recv */ -#define SPI_TMOD_TO 0x1 /* xmit only */ -#define SPI_TMOD_RO 0x2 /* recv only */ -#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */ - -#define SPI_SLVOE_OFFSET 10 -#define SPI_SRL_OFFSET 11 -#define SPI_CFS_OFFSET 12 - -/* Bit fields in SR, 7 bits */ -#define SR_MASK 0x7f /* cover 7 bits */ -#define SR_BUSY (1 << 0) -#define SR_TF_NOT_FULL (1 << 1) -#define SR_TF_EMPT (1 << 2) -#define SR_RF_NOT_EMPT (1 << 3) -#define SR_RF_FULL (1 << 4) -#define SR_TX_ERR (1 << 5) -#define SR_DCOL (1 << 6) - -struct dw_spi_reg { - u32 ctrl0; - u32 ctrl1; - u32 ssienr; - u32 mwcr; - u32 ser; - u32 baudr; - u32 txfltr; - u32 rxfltr; - u32 txflr; - u32 rxflr; - u32 sr; - u32 imr; - u32 isr; - u32 risr; - u32 txoicr; - u32 rxoicr; - u32 rxuicr; - u32 msticr; - u32 icr; - u32 dmacr; - u32 dmatdlr; - u32 dmardlr; - u32 idr; - u32 version; - - /* Currently operates as 32 bits, though only the low 16 bits matter */ - u32 dr; -} __packed; - -#define dw_readl(dw, name) __raw_readl(&(dw)->name) -#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name) - -/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */ -static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; - -static u32 *pclk_spi0; -/* Always contains an accessable address, start with 0 */ -static struct dw_spi_reg *pspi; - -static struct kmsg_dumper dw_dumper; -static int dumper_registered; - -static void dw_kmsg_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason, - const char *s1, unsigned long l1, - const char *s2, unsigned long l2) -{ - int i; - - /* When run to this, we'd better re-init the HW */ - mrst_early_console_init(); - - for (i = 0; i < l1; i++) - early_mrst_console.write(&early_mrst_console, s1 + i, 1); - for (i = 0; i < l2; i++) - early_mrst_console.write(&early_mrst_console, s2 + i, 1); -} - -/* Set the ratio rate to 115200, 8n1, IRQ disabled */ -static void max3110_write_config(void) -{ - u16 config; - - config = 0xc001; - dw_writel(pspi, dr, config); -} - -/* Translate char to a eligible word and send to max3110 */ -static void max3110_write_data(char c) -{ - u16 data; - - data = 0x8000 | c; - dw_writel(pspi, dr, data); -} - -void mrst_early_console_init(void) -{ - u32 ctrlr0 = 0; - u32 spi0_cdiv; - u32 freq; /* Freqency info only need be searched once */ - - /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */ - pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, - MRST_CLK_SPI0_REG); - spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9; - freq = 100000000 / (spi0_cdiv + 1); - - if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL) - mrst_spi_paddr = MRST_REGBASE_SPI1; - - pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, - mrst_spi_paddr); - - /* Disable SPI controller */ - dw_writel(pspi, ssienr, 0); - - /* Set control param, 8 bits, transmit only mode */ - ctrlr0 = dw_readl(pspi, ctrl0); - - ctrlr0 &= 0xfcc0; - ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET) - | (SPI_TMOD_TO << SPI_TMOD_OFFSET); - dw_writel(pspi, ctrl0, ctrlr0); - - /* - * Change the spi0 clk to comply with 115200 bps, use 100000 to - * calculate the clk dividor to make the clock a little slower - * than real baud rate. - */ - dw_writel(pspi, baudr, freq/100000); - - /* Disable all INT for early phase */ - dw_writel(pspi, imr, 0x0); - - /* Set the cs to spi-uart */ - dw_writel(pspi, ser, 0x2); - - /* Enable the HW, the last step for HW init */ - dw_writel(pspi, ssienr, 0x1); - - /* Set the default configuration */ - max3110_write_config(); - - /* Register the kmsg dumper */ - if (!dumper_registered) { - dw_dumper.dump = dw_kmsg_dump; - kmsg_dump_register(&dw_dumper); - dumper_registered = 1; - } -} - -/* Slave select should be called in the read/write function */ -static void early_mrst_spi_putc(char c) -{ - unsigned int timeout; - u32 sr; - - timeout = MRST_SPI_TIMEOUT; - /* Early putc needs to make sure the TX FIFO is not full */ - while (--timeout) { - sr = dw_readl(pspi, sr); - if (!(sr & SR_TF_NOT_FULL)) - cpu_relax(); - else - break; - } - - if (!timeout) - pr_warning("MRST earlycon: timed out\n"); - else - max3110_write_data(c); -} - -/* Early SPI only uses polling mode */ -static void early_mrst_spi_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_mrst_spi_putc('\r'); - early_mrst_spi_putc(*str); - str++; - } -} - -struct console early_mrst_console = { - .name = "earlymrst", - .write = early_mrst_spi_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* - * Following is the early console based on Medfield HSU (High - * Speed UART) device. - */ -#define HSU_PORT2_PADDR 0xffa28180 - -static void __iomem *phsu; - -void hsu_early_console_init(void) -{ - u8 lcr; - - phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, - HSU_PORT2_PADDR); - - /* Disable FIFO */ - writeb(0x0, phsu + UART_FCR); - - /* Set to default 115200 bps, 8n1 */ - lcr = readb(phsu + UART_LCR); - writeb((0x80 | lcr), phsu + UART_LCR); - writeb(0x18, phsu + UART_DLL); - writeb(lcr, phsu + UART_LCR); - writel(0x3600, phsu + UART_MUL*4); - - writeb(0x8, phsu + UART_MCR); - writeb(0x7, phsu + UART_FCR); - writeb(0x3, phsu + UART_LCR); - - /* Clear IRQ status */ - readb(phsu + UART_LSR); - readb(phsu + UART_RX); - readb(phsu + UART_IIR); - readb(phsu + UART_MSR); - - /* Enable FIFO */ - writeb(0x7, phsu + UART_FCR); -} - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -static void early_hsu_putc(char ch) -{ - unsigned int timeout = 10000; /* 10ms */ - u8 status; - - while (--timeout) { - status = readb(phsu + UART_LSR); - if (status & BOTH_EMPTY) - break; - udelay(1); - } - - /* Only write the char when there was no timeout */ - if (timeout) - writeb(ch, phsu + UART_TX); -} - -static void early_hsu_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_hsu_putc('\r'); - early_hsu_putc(*str); - str++; - } -} - -struct console early_hsu_console = { - .name = "earlyhsu", - .write = early_hsu_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; -- cgit v1.2.2 From a8760eca6cf60ed303ad494ef45901f63165d2c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 13 Dec 2010 11:28:02 +0100 Subject: x86: Check tsc available/disabled in the delayed init function The delayed TSC init function does not check whether the system has no TSC or TSC is disabled at the kernel command line, which results in a crash in the work queue based extended calibration due to division by zero because the basic calibration never happened. Add the missing checks and do not touch TSC when not available or disabled. Signed-off-by: Thomas Gleixner Cc: John Stultz --- arch/x86/kernel/tsc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index dc1393e7cbfb..356a0d455cf9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -965,6 +965,9 @@ out: static int __init init_tsc_clocksource(void) { + if (!cpu_has_tsc || tsc_disabled > 0) + return 0; + if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; /* lower the rating if we already know its unstable: */ -- cgit v1.2.2 From 3fb82d56ad003e804923185316236f26b30dfdd5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Nov 2010 16:11:40 -0800 Subject: x86, suspend: Avoid unnecessary smp alternatives switch during suspend/resume During suspend, we disable all the non boot cpus. And during resume we bring them all back again. So no need to do alternatives_smp_switch() in between. On my core 2 based laptop, this speeds up the suspend path by 15msec and the resume path by 5 msec (suspend/resume speed up differences can be attributed to the different P-states that the cpu is in during suspend/resume). Signed-off-by: Suresh Siddha LKML-Reference: <1290557500.4946.8.camel@sbsiddha-MOBL3.sc.intel.com> Cc: Rafael J. Wysocki Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 3 ++- arch/x86/kernel/smpboot.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5079f24c955a..9f98eb400fef 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) mutex_unlock(&smp_alt); } +bool skip_smp_alternatives; void alternatives_smp_switch(int smp) { struct smp_alt_module *mod; @@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp) printk("lockdep: fixing up alternatives.\n"); #endif - if (noreplace_smp || smp_alt_once) + if (noreplace_smp || smp_alt_once || skip_smp_alternatives) return; BUG_ON(!smp && (num_online_cpus() > 1)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7df..837c81e99edf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1166,6 +1166,20 @@ out: preempt_enable(); } +void arch_disable_nonboot_cpus_begin(void) +{ + /* + * Avoid the smp alternatives switch during the disable_nonboot_cpus(). + * In the suspend path, we will be back in the SMP mode shortly anyways. + */ + skip_smp_alternatives = true; +} + +void arch_disable_nonboot_cpus_end(void) +{ + skip_smp_alternatives = false; +} + void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); -- cgit v1.2.2 From e681041388e61ecd7f99dba66b3c1db11a564d92 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 30 Nov 2010 13:55:39 -0600 Subject: x86, UV: Add common uv_early_read_mmr() function for reading MMRs Early in boot, reading MMRs from the UV hub controller require calls to early_ioremap()/early_iounmap(). Rather than duplicating code, add a common function to do the map/read/unmap. Signed-off-by: Jack Steiner LKML-Reference: <20101130195926.834804371@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c1c52c341f40..0c3675f0474f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits; EXPORT_SYMBOL_GPL(uv_apicid_hibits); static DEFINE_SPINLOCK(uv_nmi_lock); +static unsigned long __init uv_early_read_mmr(unsigned long addr) +{ + unsigned long val, *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); + val = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + return val; +} + static inline bool is_GRU_range(u64 start, u64 end) { return start >= gru_start_paddr && end <= gru_end_paddr; @@ -58,16 +68,12 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end) return is_ISA_range(start, end) || is_GRU_range(start, end); } -static int early_get_nodeid(void) +static int __init early_get_nodeid(void) { union uvh_node_id_u node_id; - unsigned long *mmr; - - mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); - node_id.v = *mmr; - early_iounmap(mmr, sizeof(*mmr)); /* Currently, all blades have same revision number */ + node_id.v = uv_early_read_mmr(UVH_NODE_ID); uv_min_hub_revision_id = node_id.s.revision; return node_id.s.node_id; @@ -75,11 +81,7 @@ static int early_get_nodeid(void) static void __init early_get_apic_pnode_shift(void) { - unsigned long *mmr; - - mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr)); - uvh_apicid.v = *mmr; - early_iounmap(mmr, sizeof(*mmr)); + uvh_apicid.v = uv_early_read_mmr(UVH_APICID); if (!uvh_apicid.v) /* * Old bios, use default value @@ -95,12 +97,8 @@ static void __init early_get_apic_pnode_shift(void) static void __init uv_set_apicid_hibit(void) { union uvh_lb_target_physical_apic_id_mask_u apicid_mask; - unsigned long *mmr; - mmr = early_ioremap(UV_LOCAL_MMR_BASE | - UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr)); - apicid_mask.v = *mmr; - early_iounmap(mmr, sizeof(*mmr)); + apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK); uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; } -- cgit v1.2.2 From d8850ba425d9823d3184bd52f065899dac4689f9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 30 Nov 2010 13:55:40 -0600 Subject: x86, UV: Fix the effect of extra bits in the hub nodeid register UV systems can be partitioned into multiple independent SSIs. Large partitioned systems may have extra bits in the node_id register. These bits are used when the total memory on all SSIs exceeds 16TB. These extra bits need to be ignored when calculating x2apic_extra_bits. Signed-off-by: Jack Steiner LKML-Reference: <20101130195926.972776133@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0c3675f0474f..2a3f2a7db243 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -68,15 +68,19 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end) return is_ISA_range(start, end) || is_GRU_range(start, end); } -static int __init early_get_nodeid(void) +static int __init early_get_pnodeid(void) { union uvh_node_id_u node_id; + union uvh_rh_gam_config_mmr_u m_n_config; + int pnode; /* Currently, all blades have same revision number */ node_id.v = uv_early_read_mmr(UVH_NODE_ID); + m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); uv_min_hub_revision_id = node_id.s.revision; - return node_id.s.node_id; + pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); + return pnode; } static void __init early_get_apic_pnode_shift(void) @@ -104,10 +108,10 @@ static void __init uv_set_apicid_hibit(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int nodeid; + int pnodeid; if (!strcmp(oem_id, "SGI")) { - nodeid = early_get_nodeid(); + pnodeid = early_get_pnodeid(); early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; x86_platform.nmi_init = uv_nmi_init; @@ -117,7 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - nodeid << (uvh_apicid.s.pnode_shift - 1); + pnodeid << uvh_apicid.s.pnode_shift; uv_system_type = UV_NON_UNIQUE_APIC; uv_set_apicid_hibit(); return 1; @@ -680,27 +684,32 @@ void uv_nmi_init(void) void __init uv_system_init(void) { union uvh_rh_gam_config_mmr_u m_n_config; + union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; union uvh_node_id_u node_id; unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; - int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; + int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; int gnode_extra, max_pnode = 0; unsigned long mmr_base, present, paddr; - unsigned short pnode_mask; + unsigned short pnode_mask, pnode_io_mask; map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; + mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); + n_io = mmioh.s.n_io; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; pnode_mask = (1 << n_val) - 1; + pnode_io_mask = (1 << n_io) - 1; + node_id.v = uv_read_local_mmr(UVH_NODE_ID); gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; gnode_upper = ((unsigned long)gnode_extra << m_val); - printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", - n_val, m_val, gnode_upper, gnode_extra); + printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", + n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); @@ -733,7 +742,7 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - pnode = (i * 64 + j); + pnode = (i * 64 + j) & pnode_mask; uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; @@ -754,6 +763,7 @@ void __init uv_system_init(void) /* * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); */ + uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); @@ -770,7 +780,6 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; uv_cpu_hub_info(cpu)->pnode = pnode; - uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; @@ -794,7 +803,7 @@ void __init uv_system_init(void) map_gru_high(max_pnode); map_mmr_high(max_pnode); - map_mmioh_high(max_pnode); + map_mmioh_high(max_pnode & pnode_io_mask); uv_cpu_init(); uv_scir_register_cpu_notifier(); -- cgit v1.2.2 From 56d91f132c9be66e98cce1b1e77a28027048bb26 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 16 Dec 2010 19:09:24 -0800 Subject: x86, acpi: Add MAX_LOCAL_APIC for 32bit We should use MAX_LOCAL_APIC for max apic ids and MAX_APICS as number of local apics. Also apic_version[] array should use MAX_LOCAL_APICs. Signed-off-by: Yinghai Lu LKML-Reference: <4D0AD464.2020408@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 463839645f9b..0d5d07f2253e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1707,7 +1707,7 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ -int apic_version[MAX_APICS]; +int apic_version[MAX_LOCAL_APIC]; int __init APIC_init_uniprocessor(void) { -- cgit v1.2.2 From d3bd058826aa8b79590cca6c8e6d1557bf576ada Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 16 Dec 2010 19:09:58 -0800 Subject: x86, acpi: Parse all SRAT cpu entries even above the cpu number limitation Recent Intel new system have different order in MADT, aka will list all thread0 at first, then all thread1. But SRAT table still old order, it will list cpus in one socket all together. If the user have compiled limited NR_CPUS or boot with nr_cpus=, could have missed to put some cpus apic id to node mapping into apicid_to_node[]. for example for 4 sockets system with 64 cpus with nr_cpus=32 will get crash... [ 9.106288] Total of 32 processors activated (136190.88 BogoMIPS). [ 9.235021] divide error: 0000 [#1] SMP [ 9.235315] last sysfs file: [ 9.235481] CPU 1 [ 9.235592] Modules linked in: [ 9.245398] [ 9.245478] Pid: 2, comm: kthreadd Not tainted 2.6.37-rc1-tip-yh-01782-ge92ef79-dirty #274 /Sun Fire x4800 [ 9.265415] RIP: 0010:[] [] select_task_rq_fair+0x4f0/0x623 ... [ 9.645938] RIP [] select_task_rq_fair+0x4f0/0x623 [ 9.665356] RSP [ 9.665568] ---[ end trace 2296156d35fdfc87 ]--- So let just parse all cpu entries in SRAT. Also add apicid checking with MAX_LOCAL_APIC, in case We could out of boundaries of apicid_to_node[]. it fixes following bug too. https://bugzilla.kernel.org/show_bug.cgi?id=22662 -v2: expand to 32bit according to hpa need to add MAX_LOCAL_APIC for 32bit Reported-and-Tested-by: Wu Fengguang Reported-by: Bjorn Helgaas Tested-by: Myron Stowe Signed-off-by: Yinghai Lu LKML-Reference: <4D0AD486.9020704@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c05872aa3ce0..f19d6679600f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) { unsigned int ver = 0; + if (id >= (MAX_LOCAL_APIC-1)) { + printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); + return; + } + if (!enabled) { ++disabled_cpus; return; -- cgit v1.2.2 From 9e76a97efd31a08cb19d0ba12013b8fb4ad3e474 Mon Sep 17 00:00:00 2001 From: "R, Durgadoss" Date: Mon, 3 Jan 2011 17:22:04 +0530 Subject: x86, hwmon: Add core threshold notification to therm_throt.c This patch adds code to therm_throt.c to notify core thermal threshold events. These thresholds are supported by the IA32_THERM_INTERRUPT register. The status/log for the same is monitored using the IA32_THERM_STATUS register. The necessary #defines are in msr-index.h. A call back is added to mce.h, to further notify the thermal stack, about the threshold events. Signed-off-by: Durgadoss R LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 4b683267eca5..e12246ff5aa6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -53,8 +53,13 @@ struct thermal_state { struct _thermal_state core_power_limit; struct _thermal_state package_throttle; struct _thermal_state package_power_limit; + struct _thermal_state core_thresh0; + struct _thermal_state core_thresh1; }; +/* Callback to handle core threshold interrupts */ +int (*platform_thermal_notify)(__u64 msr_val); + static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level) return 0; } +static int thresh_event_valid(int event) +{ + struct _thermal_state *state; + unsigned int this_cpu = smp_processor_id(); + struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); + u64 now = get_jiffies_64(); + + state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; + + if (time_before64(now, state->next_check)) + return 0; + + state->next_check = now + CHECK_INTERVAL; + return 1; +} + #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, @@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device); #define PACKAGE_THROTTLED ((__u64)2 << 62) #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) +static void notify_thresholds(__u64 msr_val) +{ + /* check whether the interrupt handler is defined; + * otherwise simply return + */ + if (!platform_thermal_notify) + return; + + /* lower threshold reached */ + if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) + platform_thermal_notify(msr_val); + /* higher threshold reached */ + if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) + platform_thermal_notify(msr_val); +} + /* Thermal transition interrupt handler */ static void intel_thermal_interrupt(void) { @@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void) rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + /* Check for violation of core thermal thresholds*/ + notify_thresholds(msr_val); + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, CORE_LEVEL) != 0) -- cgit v1.2.2 From d50d8fe192428090790e7178e9507e981e0b005b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 4 Jan 2011 17:20:54 +1030 Subject: x86, mm: Initialize initial_page_table before paravirt jumps v2.6.36-rc8-54-gb40827f (x86-32, mm: Add an initial page table for core bootstrapping) made x86 boot using initial_page_table and broke lguest. For 2.6.37 we simply cut & paste the initialization code into lguest (da32dac10126 "lguest: populate initial_page_table"), now we fix it properly by doing that initialization before the paravirt jump. Signed-off-by: Rusty Russell Acked-by: Jeremy Fitzhardinge Cc: lguest Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <201101041720.54535.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 77 ++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c0dbd9ac24f0..e3a8bb91e168 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -139,39 +139,6 @@ ENTRY(startup_32) movl %eax, pa(olpc_ofw_pgd) #endif -#ifdef CONFIG_PARAVIRT - /* This is can only trip for a broken bootloader... */ - cmpw $0x207, pa(boot_params + BP_version) - jb default_entry - - /* Paravirt-compatible boot parameters. Look to see what architecture - we're booting under. */ - movl pa(boot_params + BP_hardware_subarch), %eax - cmpl $num_subarch_entries, %eax - jae bad_subarch - - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax - -bad_subarch: -WEAK(lguest_entry) -WEAK(xen_entry) - /* Unknown implementation; there's really - nothing we can do at this point. */ - ud2a - - __INITDATA - -subarch_entries: - .long default_entry /* normal x86/PC */ - .long lguest_entry /* lguest hypervisor */ - .long xen_entry /* Xen hypervisor */ - .long default_entry /* Moorestown MID */ -num_subarch_entries = (. - subarch_entries) / 4 -.previous -#endif /* CONFIG_PARAVIRT */ - /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond __brk_base. The variable @@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4 * * Note that the stack is not yet set up! */ -default_entry: #ifdef CONFIG_X86_PAE /* @@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(initial_page_table+0xffc) #endif - jmp 3f + +#ifdef CONFIG_PARAVIRT + /* This is can only trip for a broken bootloader... */ + cmpw $0x207, pa(boot_params + BP_version) + jb default_entry + + /* Paravirt-compatible boot parameters. Look to see what architecture + we're booting under. */ + movl pa(boot_params + BP_hardware_subarch), %eax + cmpl $num_subarch_entries, %eax + jae bad_subarch + + movl pa(subarch_entries)(,%eax,4), %eax + subl $__PAGE_OFFSET, %eax + jmp *%eax + +bad_subarch: +WEAK(lguest_entry) +WEAK(xen_entry) + /* Unknown implementation; there's really + nothing we can do at this point. */ + ud2a + + __INITDATA + +subarch_entries: + .long default_entry /* normal x86/PC */ + .long lguest_entry /* lguest hypervisor */ + .long xen_entry /* Xen hypervisor */ + .long default_entry /* Moorestown MID */ +num_subarch_entries = (. - subarch_entries) / 4 +.previous +#else + jmp default_entry +#endif /* CONFIG_PARAVIRT */ + /* * Non-boot CPU entry point; entered from trampoline.S * We can't lgdt here, because lgdt itself uses a data segment, but @@ -282,7 +283,7 @@ ENTRY(startup_32_smp) movl %eax,%fs movl %eax,%gs #endif /* CONFIG_SMP */ -3: +default_entry: /* * New page tables may be in 4Mbyte page mode and may @@ -622,13 +623,13 @@ ENTRY(initial_code) __PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE -ENTRY(initial_pg_pmd) +initial_pg_pmd: .fill 1024*KPMDS,4,0 #else ENTRY(initial_page_table) .fill 1024,4,0 #endif -ENTRY(initial_pg_fixmap) +initial_pg_fixmap: .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 -- cgit v1.2.2 From cb2ded37fd2e1039f96c8c892da024a8f033add5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 4 Jan 2011 16:38:52 -0800 Subject: x86: Fix APIC ID sizing bug on larger systems, clean up MAX_APICS confusion Found one x2apic pre-enabled system, x2apic_mode suddenly get corrupted after register some cpus, when compiled CONFIG_NR_CPUS=255 instead of 512. It turns out that generic_processor_info() ==> phyid_set(apicid, phys_cpu_present_map) causes the problem. phys_cpu_present_map is sized by MAX_APICS bits, and pre-enabled system some cpus have an apic id > 255. The variable after phys_cpu_present_map may get corrupted silently: ffffffff828e8420 B phys_cpu_present_map ffffffff828e8440 B apic_verbosity ffffffff828e8444 B local_apic_timer_c2_ok ffffffff828e8448 B disable_apic ffffffff828e844c B x2apic_mode ffffffff828e8450 B x2apic_disabled ffffffff828e8454 B num_processors ... Actually phys_cpu_present_map is referenced via apic id, instead index. We should use MAX_LOCAL_APIC instead MAX_APICS. For 64-bit it will be 32768 in all cases. BSS will increase by 4k bytes on 64-bit: text data bss dec filename 21696943 4193748 12787712 38678403 vmlinux.before 21696943 4193748 12791808 38682499 vmlinux.after No change on 32bit. Finally we can remove MAX_APCIS that was rather confusing. Signed-off-by: Yinghai Lu Cc: H. Peter Anvin Cc: "Eric W. Biederman" LKML-Reference: <4D23BD9C.3070102@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 6 +++--- arch/x86/kernel/apic/io_apic.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7235e5fbdb6d..17c8090fabd4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -915,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_register_lapic_address(acpi_lapic_addr); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, - acpi_parse_sapic, MAX_APICS); + acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_APICS); + acpi_parse_x2apic, MAX_LOCAL_APIC); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_APICS); + acpi_parse_lapic, MAX_LOCAL_APIC); } if (!count && !x2count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 943d814ef8e4..2fc696e4d565 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4109,7 +4109,8 @@ void __init pre_init_apic_IRQ0(void) printk(KERN_INFO "Early APIC setup for system timer0\n"); #ifndef CONFIG_SMP - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, + &phys_cpu_present_map); #endif /* Make sure the irq descriptor is set up */ cfg = alloc_irq_and_cfg_at(0, 0); -- cgit v1.2.2