aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorholt@sgi.com <holt@sgi.com>2008-04-03 16:17:13 -0400
committerTony Luck <tony.luck@intel.com>2008-04-08 16:51:35 -0400
commit2c6e6db41f01b6b4eb98809350827c9678996698 (patch)
tree00438344c0ad599c1301db2abe32a4c2ee89b607
parent41bd26d67c41e325c6b9e56aadfe9dad8af9a565 (diff)
[IA64] Minimize per_cpu reservations.
This attached patch significantly shrinks boot memory allocation on ia64. It does this by not allocating per_cpu areas for cpus that can never exist. In the case where acpi does not have any numa node description of the cpus, I defaulted to assigning the first 32 round-robin on the known nodes.. For the !CONFIG_ACPI I used for_each_possible_cpu(). Signed-off-by: Robin Holt <holt@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/acpi.c4
-rw-r--r--arch/ia64/kernel/numa.c2
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/ia64/mm/discontig.c12
-rw-r--r--arch/ia64/mm/numa.c4
-rw-r--r--include/asm-ia64/acpi.h33
-rw-r--r--include/asm-ia64/numa.h2
7 files changed, 48 insertions, 11 deletions
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 78f28d825f30..c7467f863c7a 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
423#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) 423#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
424#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) 424#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
425static struct acpi_table_slit __initdata *slit_table; 425static struct acpi_table_slit __initdata *slit_table;
426cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
426 427
427static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) 428static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
428{ 429{
@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
482 (pa->apic_id << 8) | (pa->local_sapic_eid); 483 (pa->apic_id << 8) | (pa->local_sapic_eid);
483 /* nid should be overridden as logical node id later */ 484 /* nid should be overridden as logical node id later */
484 node_cpuid[srat_num_cpus].nid = pxm; 485 node_cpuid[srat_num_cpus].nid = pxm;
486 cpu_set(srat_num_cpus, early_cpu_possible_map);
485 srat_num_cpus++; 487 srat_num_cpus++;
486} 488}
487 489
@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
559 } 561 }
560 562
561 /* set logical node id in cpu structure */ 563 /* set logical node id in cpu structure */
562 for (i = 0; i < srat_num_cpus; i++) 564 for_each_possible_early_cpu(i)
563 node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); 565 node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
564 566
565 printk(KERN_INFO "Number of logical nodes in system = %d\n", 567 printk(KERN_INFO "Number of logical nodes in system = %d\n",
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a78b45f5fe2f..c93420c97409 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
73 for(node=0; node < MAX_NUMNODES; node++) 73 for(node=0; node < MAX_NUMNODES; node++)
74 cpus_clear(node_to_cpu_mask[node]); 74 cpus_clear(node_to_cpu_mask[node]);
75 75
76 for(cpu = 0; cpu < NR_CPUS; ++cpu) { 76 for_each_possible_early_cpu(cpu) {
77 node = -1; 77 node = -1;
78 for (i = 0; i < NR_CPUS; ++i) 78 for (i = 0; i < NR_CPUS; ++i)
79 if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { 79 if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4aa9eaea76c3..6206541f9e87 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -493,6 +493,8 @@ setup_arch (char **cmdline_p)
493 acpi_table_init(); 493 acpi_table_init();
494# ifdef CONFIG_ACPI_NUMA 494# ifdef CONFIG_ACPI_NUMA
495 acpi_numa_init(); 495 acpi_numa_init();
496 per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
497 32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
496# endif 498# endif
497#else 499#else
498# ifdef CONFIG_SMP 500# ifdef CONFIG_SMP
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 06c540a29467..6136a4c6df11 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
104{ 104{
105 int cpu, n = 0; 105 int cpu, n = 0;
106 106
107 for (cpu = 0; cpu < NR_CPUS; cpu++) 107 for_each_possible_early_cpu(cpu)
108 if (node == node_cpuid[cpu].nid) 108 if (node == node_cpuid[cpu].nid)
109 n++; 109 n++;
110 110
@@ -143,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
143#ifdef CONFIG_SMP 143#ifdef CONFIG_SMP
144 int cpu; 144 int cpu;
145 145
146 for (cpu = 0; cpu < NR_CPUS; cpu++) { 146 for_each_possible_early_cpu(cpu) {
147 if (node == node_cpuid[cpu].nid) { 147 if (node == node_cpuid[cpu].nid) {
148 memcpy(__va(cpu_data), __phys_per_cpu_start, 148 memcpy(__va(cpu_data), __phys_per_cpu_start,
149 __per_cpu_end - __per_cpu_start); 149 __per_cpu_end - __per_cpu_start);
@@ -346,7 +346,7 @@ static void __init initialize_pernode_data(void)
346 346
347#ifdef CONFIG_SMP 347#ifdef CONFIG_SMP
348 /* Set the node_data pointer for each per-cpu struct */ 348 /* Set the node_data pointer for each per-cpu struct */
349 for (cpu = 0; cpu < NR_CPUS; cpu++) { 349 for_each_possible_early_cpu(cpu) {
350 node = node_cpuid[cpu].nid; 350 node = node_cpuid[cpu].nid;
351 per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; 351 per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
352 } 352 }
@@ -494,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
494 int cpu; 494 int cpu;
495 static int first_time = 1; 495 static int first_time = 1;
496 496
497
498 if (smp_processor_id() != 0)
499 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
500
501 if (first_time) { 497 if (first_time) {
502 first_time = 0; 498 first_time = 0;
503 for (cpu = 0; cpu < NR_CPUS; cpu++) 499 for_each_possible_early_cpu(cpu)
504 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; 500 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
505 } 501 }
506 502
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 7807fc5c0422..b73bf1838e57 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -27,7 +27,9 @@
27 */ 27 */
28int num_node_memblks; 28int num_node_memblks;
29struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; 29struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
30struct node_cpuid_s node_cpuid[NR_CPUS]; 30struct node_cpuid_s node_cpuid[NR_CPUS] =
31 { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
32
31/* 33/*
32 * This is a matrix with "distances" between nodes, they should be 34 * This is a matrix with "distances" between nodes, they should be
33 * proportional to the memory access latency ratios. 35 * proportional to the memory access latency ratios.
diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h
index cd1cc39b5599..fcfad326f4c7 100644
--- a/include/asm-ia64/acpi.h
+++ b/include/asm-ia64/acpi.h
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/numa.h> 36#include <linux/numa.h>
37#include <asm/system.h> 37#include <asm/system.h>
38#include <asm/numa.h>
38 39
39#define COMPILER_DEPENDENT_INT64 long 40#define COMPILER_DEPENDENT_INT64 long
40#define COMPILER_DEPENDENT_UINT64 unsigned long 41#define COMPILER_DEPENDENT_UINT64 unsigned long
@@ -115,7 +116,11 @@ extern unsigned int is_cpu_cpei_target(unsigned int cpu);
115extern void set_cpei_target_cpu(unsigned int cpu); 116extern void set_cpei_target_cpu(unsigned int cpu);
116extern unsigned int get_cpei_target_cpu(void); 117extern unsigned int get_cpei_target_cpu(void);
117extern void prefill_possible_map(void); 118extern void prefill_possible_map(void);
119#ifdef CONFIG_ACPI_HOTPLUG_CPU
118extern int additional_cpus; 120extern int additional_cpus;
121#else
122#define additional_cpus 0
123#endif
119 124
120#ifdef CONFIG_ACPI_NUMA 125#ifdef CONFIG_ACPI_NUMA
121#if MAX_NUMNODES > 256 126#if MAX_NUMNODES > 256
@@ -129,6 +134,34 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
129 134
130#define acpi_unlazy_tlb(x) 135#define acpi_unlazy_tlb(x)
131 136
137#ifdef CONFIG_ACPI_NUMA
138extern cpumask_t early_cpu_possible_map;
139#define for_each_possible_early_cpu(cpu) \
140 for_each_cpu_mask((cpu), early_cpu_possible_map)
141
142static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
143{
144 int low_cpu, high_cpu;
145 int cpu;
146 int next_nid = 0;
147
148 low_cpu = cpus_weight(early_cpu_possible_map);
149
150 high_cpu = max(low_cpu, min_cpus);
151 high_cpu = min(high_cpu + reserve_cpus, NR_CPUS);
152
153 for (cpu = low_cpu; cpu < high_cpu; cpu++) {
154 cpu_set(cpu, early_cpu_possible_map);
155 if (node_cpuid[cpu].nid == NUMA_NO_NODE) {
156 node_cpuid[cpu].nid = next_nid;
157 next_nid++;
158 if (next_nid >= num_online_nodes())
159 next_nid = 0;
160 }
161 }
162}
163#endif /* CONFIG_ACPI_NUMA */
164
132#endif /*__KERNEL__*/ 165#endif /*__KERNEL__*/
133 166
134#endif /*_ASM_ACPI_H*/ 167#endif /*_ASM_ACPI_H*/
diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h
index 6a8a27cfae3e..3499ff57bf42 100644
--- a/include/asm-ia64/numa.h
+++ b/include/asm-ia64/numa.h
@@ -22,6 +22,8 @@
22 22
23#include <asm/mmzone.h> 23#include <asm/mmzone.h>
24 24
25#define NUMA_NO_NODE -1
26
25extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; 27extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
26extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; 28extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
27extern pg_data_t *pgdat_list[MAX_NUMNODES]; 29extern pg_data_t *pgdat_list[MAX_NUMNODES];