aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteffen Persvold <sp@numascale.com>2011-12-05 11:07:26 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-05 11:17:24 -0500
commit44b111b519160e33fdc41eadb39af86a24707edf (patch)
treec69d9b0022c410fbdb484c71181498bad9db2581
parent64be4c1c2428e148de6081af235e2418e6a66dda (diff)
x86: Add NumaChip support
Adds support for Numascale NumaChip large-SMP systems. It is needed to enable the booting of more than ~168 cores. v2: - [Steffen] enumerate only accessible northbridges - [Daniel] rediffed and validated against 3.1-rc10 v3: - [Daniel] use x86_init core numbering override - [Daniel] cleanups as per feedback v4: - [Daniel] use updated x86_cpuinit override v5: - drop disabling interrupts locally, as ISR write is atomic; drop delay - added read-mostly annotations where appropriate - require CONFIG_SMP, so drop conditional path Workload tested on 96 cores/16 sockets. Signed-off-by: Steffen Persvold <sp@numascale.com> Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com> Cc: Jesse Barnes <jbarnes@virtuousgeek.org> Link: http://lkml.kernel.org/r/1323101246-2400-1-git-send-email-daniel@numascale-asia.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/include/asm/numachip/numachip_csr.h167
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c294
4 files changed, 475 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb9a1044a771..7b9eaa1ae10b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -343,6 +343,7 @@ config X86_EXTENDED_PLATFORM
343 343
344 If you enable this option then you'll be able to select support 344 If you enable this option then you'll be able to select support
345 for the following (non-PC) 64 bit x86 platforms: 345 for the following (non-PC) 64 bit x86 platforms:
346 Numascale NumaChip
346 ScaleMP vSMP 347 ScaleMP vSMP
347 SGI Ultraviolet 348 SGI Ultraviolet
348 349
@@ -351,6 +352,18 @@ config X86_EXTENDED_PLATFORM
351endif 352endif
352# This is an alphabetically sorted list of 64 bit extended platforms 353# This is an alphabetically sorted list of 64 bit extended platforms
353# Please maintain the alphabetic order if and when there are additions 354# Please maintain the alphabetic order if and when there are additions
355config X86_NUMACHIP
356 bool "Numascale NumaChip"
357 depends on X86_64
358 depends on X86_EXTENDED_PLATFORM
359 depends on NUMA
360 depends on SMP
361 depends on X86_X2APIC
362 depends on !EDAC_AMD64
363 ---help---
364 Adds support for Numascale NumaChip large-SMP systems. Needed to
365 enable more than ~168 cores.
366 If you don't have one of these, you should say N here.
354 367
355config X86_VSMP 368config X86_VSMP
356 bool "ScaleMP vSMP" 369 bool "ScaleMP vSMP"
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644
index 000000000000..660f843df928
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -0,0 +1,167 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific Header file
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
15#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
16
17#include <linux/numa.h>
18#include <linux/percpu.h>
19#include <linux/io.h>
20#include <linux/swab.h>
21#include <asm/types.h>
22#include <asm/processor.h>
23
24#define CSR_NODE_SHIFT 16
25#define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT)
26#define CSR_NODE_MASK 0x0fff /* 4K nodes */
27
28/* 32K CSR space, b15 indicates geo/non-geo */
29#define CSR_OFFSET_MASK 0x7fffUL
30
31/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
32#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL
33#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL
34#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
35
36/*
37 * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
38 * when using the direct mapping on x86_64, both start and size needs to be
39 * aligned with PMD_SIZE which is 2M
40 */
41#define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL
42#define NUMACHIP_LCSR_LIM 0x3fffffffffffULL
43#define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
44
45static inline void *gcsr_address(int node, unsigned long offset)
46{
47 return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
48 CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
49}
50
51static inline void *lcsr_address(unsigned long offset)
52{
53 return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
54 CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
55}
56
57static inline unsigned int read_gcsr(int node, unsigned long offset)
58{
59 return swab32(readl(gcsr_address(node, offset)));
60}
61
62static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
63{
64 writel(swab32(val), gcsr_address(node, offset));
65}
66
67static inline unsigned int read_lcsr(unsigned long offset)
68{
69 return swab32(readl(lcsr_address(offset)));
70}
71
72static inline void write_lcsr(unsigned long offset, unsigned int val)
73{
74 writel(swab32(val), lcsr_address(offset));
75}
76
77/* ========================================================================= */
78/* CSR_G0_STATE_CLEAR */
79/* ========================================================================= */
80
81#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
82union numachip_csr_g0_state_clear {
83 unsigned int v;
84 struct numachip_csr_g0_state_clear_s {
85 unsigned int _state:2;
86 unsigned int _rsvd_2_6:5;
87 unsigned int _lost:1;
88 unsigned int _rsvd_8_31:24;
89 } s;
90};
91
92/* ========================================================================= */
93/* CSR_G0_NODE_IDS */
94/* ========================================================================= */
95
96#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
97union numachip_csr_g0_node_ids {
98 unsigned int v;
99 struct numachip_csr_g0_node_ids_s {
100 unsigned int _initialid:16;
101 unsigned int _nodeid:12;
102 unsigned int _rsvd_28_31:4;
103 } s;
104};
105
106/* ========================================================================= */
107/* CSR_G3_EXT_IRQ_GEN */
108/* ========================================================================= */
109
110#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
111union numachip_csr_g3_ext_irq_gen {
112 unsigned int v;
113 struct numachip_csr_g3_ext_irq_gen_s {
114 unsigned int _vector:8;
115 unsigned int _msgtype:3;
116 unsigned int _index:5;
117 unsigned int _destination_apic_id:16;
118 } s;
119};
120
121/* ========================================================================= */
122/* CSR_G3_EXT_IRQ_STATUS */
123/* ========================================================================= */
124
125#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
126union numachip_csr_g3_ext_irq_status {
127 unsigned int v;
128 struct numachip_csr_g3_ext_irq_status_s {
129 unsigned int _result:32;
130 } s;
131};
132
133/* ========================================================================= */
134/* CSR_G3_EXT_IRQ_DEST */
135/* ========================================================================= */
136
137#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
138union numachip_csr_g3_ext_irq_dest {
139 unsigned int v;
140 struct numachip_csr_g3_ext_irq_dest_s {
141 unsigned int _irq:8;
142 unsigned int _rsvd_8_31:24;
143 } s;
144};
145
146/* ========================================================================= */
147/* CSR_G3_NC_ATT_MAP_SELECT */
148/* ========================================================================= */
149
150#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
151union numachip_csr_g3_nc_att_map_select {
152 unsigned int v;
153 struct numachip_csr_g3_nc_att_map_select_s {
154 unsigned int _upper_address_bits:4;
155 unsigned int _select_ram:4;
156 unsigned int _rsvd_8_31:24;
157 } s;
158};
159
160/* ========================================================================= */
161/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */
162/* ========================================================================= */
163
164#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
165
166#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
167
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 767fd04f2843..0ae0323b1f9c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o
10 10
11ifeq ($(CONFIG_X86_64),y) 11ifeq ($(CONFIG_X86_64),y)
12# APIC probe will depend on the listing order here 12# APIC probe will depend on the listing order here
13obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
13obj-$(CONFIG_X86_UV) += x2apic_uv_x.o 14obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
14obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o 15obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
15obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o 16obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 000000000000..09d3d8c1cd99
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,294 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific APIC Code
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#include <linux/errno.h>
15#include <linux/threads.h>
16#include <linux/cpumask.h>
17#include <linux/string.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/ctype.h>
21#include <linux/init.h>
22#include <linux/hardirq.h>
23#include <linux/delay.h>
24
25#include <asm/numachip/numachip_csr.h>
26#include <asm/smp.h>
27#include <asm/apic.h>
28#include <asm/ipi.h>
29#include <asm/apic_flat_64.h>
30
31static int numachip_system __read_mostly;
32
33static struct apic apic_numachip __read_mostly;
34
35static unsigned int get_apic_id(unsigned long x)
36{
37 unsigned long value;
38 unsigned int id;
39
40 rdmsrl(MSR_FAM10H_NODE_ID, value);
41 id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
42
43 return id;
44}
45
46static unsigned long set_apic_id(unsigned int id)
47{
48 unsigned long x;
49
50 x = ((id & 0xffU) << 24);
51 return x;
52}
53
54static unsigned int read_xapic_id(void)
55{
56 return get_apic_id(apic_read(APIC_ID));
57}
58
59static int numachip_apic_id_registered(void)
60{
61 return physid_isset(read_xapic_id(), phys_cpu_present_map);
62}
63
64static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
65{
66 return initial_apic_id >> index_msb;
67}
68
69static const struct cpumask *numachip_target_cpus(void)
70{
71 return cpu_online_mask;
72}
73
74static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
75{
76 cpumask_clear(retmask);
77 cpumask_set_cpu(cpu, retmask);
78}
79
80static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
81{
82 union numachip_csr_g3_ext_irq_gen int_gen;
83
84 int_gen.s._destination_apic_id = phys_apicid;
85 int_gen.s._vector = 0;
86 int_gen.s._msgtype = APIC_DM_INIT >> 8;
87 int_gen.s._index = 0;
88
89 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
90
91 int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
92 int_gen.s._vector = start_rip >> 12;
93
94 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
95
96 atomic_set(&init_deasserted, 1);
97 return 0;
98}
99
100static void numachip_send_IPI_one(int cpu, int vector)
101{
102 union numachip_csr_g3_ext_irq_gen int_gen;
103 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
104
105 int_gen.s._destination_apic_id = apicid;
106 int_gen.s._vector = vector;
107 int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
108 int_gen.s._index = 0;
109
110 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
111}
112
113static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
114{
115 unsigned int cpu;
116
117 for_each_cpu(cpu, mask)
118 numachip_send_IPI_one(cpu, vector);
119}
120
121static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
122 int vector)
123{
124 unsigned int this_cpu = smp_processor_id();
125 unsigned int cpu;
126
127 for_each_cpu(cpu, mask) {
128 if (cpu != this_cpu)
129 numachip_send_IPI_one(cpu, vector);
130 }
131}
132
133static void numachip_send_IPI_allbutself(int vector)
134{
135 unsigned int this_cpu = smp_processor_id();
136 unsigned int cpu;
137
138 for_each_online_cpu(cpu) {
139 if (cpu != this_cpu)
140 numachip_send_IPI_one(cpu, vector);
141 }
142}
143
144static void numachip_send_IPI_all(int vector)
145{
146 numachip_send_IPI_mask(cpu_online_mask, vector);
147}
148
149static void numachip_send_IPI_self(int vector)
150{
151 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
152}
153
154static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
155{
156 int cpu;
157
158 /*
159 * We're using fixed IRQ delivery, can only return one phys APIC ID.
160 * May as well be the first.
161 */
162 cpu = cpumask_first(cpumask);
163 if (likely((unsigned)cpu < nr_cpu_ids))
164 return per_cpu(x86_cpu_to_apicid, cpu);
165
166 return BAD_APICID;
167}
168
169static unsigned int
170numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
171 const struct cpumask *andmask)
172{
173 int cpu;
174
175 /*
176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
177 * May as well be the first.
178 */
179 for_each_cpu_and(cpu, cpumask, andmask) {
180 if (cpumask_test_cpu(cpu, cpu_online_mask))
181 break;
182 }
183 return per_cpu(x86_cpu_to_apicid, cpu);
184}
185
186static int __init numachip_probe(void)
187{
188 return apic == &apic_numachip;
189}
190
191static void __init map_csrs(void)
192{
193 printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
194 NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
195 init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
196
197 printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
198 NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
199 init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
200}
201
202static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
203{
204 c->phys_proc_id = node;
205 per_cpu(cpu_llc_id, smp_processor_id()) = node;
206}
207
208static int __init numachip_system_init(void)
209{
210 unsigned int val;
211
212 if (!numachip_system)
213 return 0;
214
215 x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
216
217 map_csrs();
218
219 val = read_lcsr(CSR_G0_NODE_IDS);
220 printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
221
222 return 0;
223}
224early_initcall(numachip_system_init);
225
226static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
227{
228 if (!strncmp(oem_id, "NUMASC", 6)) {
229 numachip_system = 1;
230 return 1;
231 }
232
233 return 0;
234}
235
236static struct apic apic_numachip __refconst = {
237
238 .name = "NumaConnect system",
239 .probe = numachip_probe,
240 .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
241 .apic_id_registered = numachip_apic_id_registered,
242
243 .irq_delivery_mode = dest_Fixed,
244 .irq_dest_mode = 0, /* physical */
245
246 .target_cpus = numachip_target_cpus,
247 .disable_esr = 0,
248 .dest_logical = 0,
249 .check_apicid_used = NULL,
250 .check_apicid_present = NULL,
251
252 .vector_allocation_domain = numachip_vector_allocation_domain,
253 .init_apic_ldr = flat_init_apic_ldr,
254
255 .ioapic_phys_id_map = NULL,
256 .setup_apic_routing = NULL,
257 .multi_timer_check = NULL,
258 .cpu_present_to_apicid = default_cpu_present_to_apicid,
259 .apicid_to_cpu_present = NULL,
260 .setup_portio_remap = NULL,
261 .check_phys_apicid_present = default_check_phys_apicid_present,
262 .enable_apic_mode = NULL,
263 .phys_pkg_id = numachip_phys_pkg_id,
264 .mps_oem_check = NULL,
265
266 .get_apic_id = get_apic_id,
267 .set_apic_id = set_apic_id,
268 .apic_id_mask = 0xffU << 24,
269
270 .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
271 .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
272
273 .send_IPI_mask = numachip_send_IPI_mask,
274 .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
275 .send_IPI_allbutself = numachip_send_IPI_allbutself,
276 .send_IPI_all = numachip_send_IPI_all,
277 .send_IPI_self = numachip_send_IPI_self,
278
279 .wakeup_secondary_cpu = numachip_wakeup_secondary,
280 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
281 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
282 .wait_for_init_deassert = NULL,
283 .smp_callin_clear_local_apic = NULL,
284 .inquire_remote_apic = NULL, /* REMRD not supported */
285
286 .read = native_apic_mem_read,
287 .write = native_apic_mem_write,
288 .icr_read = native_apic_icr_read,
289 .icr_write = native_apic_icr_write,
290 .wait_icr_idle = native_apic_wait_icr_idle,
291 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
292};
293apic_driver(apic_numachip);
294