diff options
author | holt@sgi.com <holt@sgi.com> | 2008-04-03 16:17:13 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2008-04-08 16:51:35 -0400 |
commit | 2c6e6db41f01b6b4eb98809350827c9678996698 (patch) | |
tree | 00438344c0ad599c1301db2abe32a4c2ee89b607 | |
parent | 41bd26d67c41e325c6b9e56aadfe9dad8af9a565 (diff) |
[IA64] Minimize per_cpu reservations.
This attached patch significantly shrinks boot memory allocation on ia64.
It does this by not allocating per_cpu areas for cpus that can never
exist.
In the case where acpi does not have any numa node description of the
cpus, I defaulted to assigning the first 32 round-robin on the known
nodes.. For the !CONFIG_ACPI I used for_each_possible_cpu().
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | arch/ia64/kernel/acpi.c | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/numa.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 12 | ||||
-rw-r--r-- | arch/ia64/mm/numa.c | 4 | ||||
-rw-r--r-- | include/asm-ia64/acpi.h | 33 | ||||
-rw-r--r-- | include/asm-ia64/numa.h | 2 |
7 files changed, 48 insertions, 11 deletions
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 78f28d825f30..c7467f863c7a 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c | |||
@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; | |||
423 | #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) | 423 | #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) |
424 | #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) | 424 | #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) |
425 | static struct acpi_table_slit __initdata *slit_table; | 425 | static struct acpi_table_slit __initdata *slit_table; |
426 | cpumask_t early_cpu_possible_map = CPU_MASK_NONE; | ||
426 | 427 | ||
427 | static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) | 428 | static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) |
428 | { | 429 | { |
@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
482 | (pa->apic_id << 8) | (pa->local_sapic_eid); | 483 | (pa->apic_id << 8) | (pa->local_sapic_eid); |
483 | /* nid should be overridden as logical node id later */ | 484 | /* nid should be overridden as logical node id later */ |
484 | node_cpuid[srat_num_cpus].nid = pxm; | 485 | node_cpuid[srat_num_cpus].nid = pxm; |
486 | cpu_set(srat_num_cpus, early_cpu_possible_map); | ||
485 | srat_num_cpus++; | 487 | srat_num_cpus++; |
486 | } | 488 | } |
487 | 489 | ||
@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void) | |||
559 | } | 561 | } |
560 | 562 | ||
561 | /* set logical node id in cpu structure */ | 563 | /* set logical node id in cpu structure */ |
562 | for (i = 0; i < srat_num_cpus; i++) | 564 | for_each_possible_early_cpu(i) |
563 | node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); | 565 | node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); |
564 | 566 | ||
565 | printk(KERN_INFO "Number of logical nodes in system = %d\n", | 567 | printk(KERN_INFO "Number of logical nodes in system = %d\n", |
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index a78b45f5fe2f..c93420c97409 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c | |||
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void) | |||
73 | for(node=0; node < MAX_NUMNODES; node++) | 73 | for(node=0; node < MAX_NUMNODES; node++) |
74 | cpus_clear(node_to_cpu_mask[node]); | 74 | cpus_clear(node_to_cpu_mask[node]); |
75 | 75 | ||
76 | for(cpu = 0; cpu < NR_CPUS; ++cpu) { | 76 | for_each_possible_early_cpu(cpu) { |
77 | node = -1; | 77 | node = -1; |
78 | for (i = 0; i < NR_CPUS; ++i) | 78 | for (i = 0; i < NR_CPUS; ++i) |
79 | if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { | 79 | if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { |
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4aa9eaea76c3..6206541f9e87 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
@@ -493,6 +493,8 @@ setup_arch (char **cmdline_p) | |||
493 | acpi_table_init(); | 493 | acpi_table_init(); |
494 | # ifdef CONFIG_ACPI_NUMA | 494 | # ifdef CONFIG_ACPI_NUMA |
495 | acpi_numa_init(); | 495 | acpi_numa_init(); |
496 | per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? | ||
497 | 32 : cpus_weight(early_cpu_possible_map)), additional_cpus); | ||
496 | # endif | 498 | # endif |
497 | #else | 499 | #else |
498 | # ifdef CONFIG_SMP | 500 | # ifdef CONFIG_SMP |
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 06c540a29467..6136a4c6df11 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c | |||
@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node) | |||
104 | { | 104 | { |
105 | int cpu, n = 0; | 105 | int cpu, n = 0; |
106 | 106 | ||
107 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 107 | for_each_possible_early_cpu(cpu) |
108 | if (node == node_cpuid[cpu].nid) | 108 | if (node == node_cpuid[cpu].nid) |
109 | n++; | 109 | n++; |
110 | 110 | ||
@@ -143,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node) | |||
143 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
144 | int cpu; | 144 | int cpu; |
145 | 145 | ||
146 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 146 | for_each_possible_early_cpu(cpu) { |
147 | if (node == node_cpuid[cpu].nid) { | 147 | if (node == node_cpuid[cpu].nid) { |
148 | memcpy(__va(cpu_data), __phys_per_cpu_start, | 148 | memcpy(__va(cpu_data), __phys_per_cpu_start, |
149 | __per_cpu_end - __per_cpu_start); | 149 | __per_cpu_end - __per_cpu_start); |
@@ -346,7 +346,7 @@ static void __init initialize_pernode_data(void) | |||
346 | 346 | ||
347 | #ifdef CONFIG_SMP | 347 | #ifdef CONFIG_SMP |
348 | /* Set the node_data pointer for each per-cpu struct */ | 348 | /* Set the node_data pointer for each per-cpu struct */ |
349 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 349 | for_each_possible_early_cpu(cpu) { |
350 | node = node_cpuid[cpu].nid; | 350 | node = node_cpuid[cpu].nid; |
351 | per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; | 351 | per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; |
352 | } | 352 | } |
@@ -494,13 +494,9 @@ void __cpuinit *per_cpu_init(void) | |||
494 | int cpu; | 494 | int cpu; |
495 | static int first_time = 1; | 495 | static int first_time = 1; |
496 | 496 | ||
497 | |||
498 | if (smp_processor_id() != 0) | ||
499 | return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; | ||
500 | |||
501 | if (first_time) { | 497 | if (first_time) { |
502 | first_time = 0; | 498 | first_time = 0; |
503 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 499 | for_each_possible_early_cpu(cpu) |
504 | per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; | 500 | per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; |
505 | } | 501 | } |
506 | 502 | ||
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 7807fc5c0422..b73bf1838e57 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c | |||
@@ -27,7 +27,9 @@ | |||
27 | */ | 27 | */ |
28 | int num_node_memblks; | 28 | int num_node_memblks; |
29 | struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; | 29 | struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; |
30 | struct node_cpuid_s node_cpuid[NR_CPUS]; | 30 | struct node_cpuid_s node_cpuid[NR_CPUS] = |
31 | { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; | ||
32 | |||
31 | /* | 33 | /* |
32 | * This is a matrix with "distances" between nodes, they should be | 34 | * This is a matrix with "distances" between nodes, they should be |
33 | * proportional to the memory access latency ratios. | 35 | * proportional to the memory access latency ratios. |
diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index cd1cc39b5599..fcfad326f4c7 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/numa.h> | 36 | #include <linux/numa.h> |
37 | #include <asm/system.h> | 37 | #include <asm/system.h> |
38 | #include <asm/numa.h> | ||
38 | 39 | ||
39 | #define COMPILER_DEPENDENT_INT64 long | 40 | #define COMPILER_DEPENDENT_INT64 long |
40 | #define COMPILER_DEPENDENT_UINT64 unsigned long | 41 | #define COMPILER_DEPENDENT_UINT64 unsigned long |
@@ -115,7 +116,11 @@ extern unsigned int is_cpu_cpei_target(unsigned int cpu); | |||
115 | extern void set_cpei_target_cpu(unsigned int cpu); | 116 | extern void set_cpei_target_cpu(unsigned int cpu); |
116 | extern unsigned int get_cpei_target_cpu(void); | 117 | extern unsigned int get_cpei_target_cpu(void); |
117 | extern void prefill_possible_map(void); | 118 | extern void prefill_possible_map(void); |
119 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | ||
118 | extern int additional_cpus; | 120 | extern int additional_cpus; |
121 | #else | ||
122 | #define additional_cpus 0 | ||
123 | #endif | ||
119 | 124 | ||
120 | #ifdef CONFIG_ACPI_NUMA | 125 | #ifdef CONFIG_ACPI_NUMA |
121 | #if MAX_NUMNODES > 256 | 126 | #if MAX_NUMNODES > 256 |
@@ -129,6 +134,34 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; | |||
129 | 134 | ||
130 | #define acpi_unlazy_tlb(x) | 135 | #define acpi_unlazy_tlb(x) |
131 | 136 | ||
137 | #ifdef CONFIG_ACPI_NUMA | ||
138 | extern cpumask_t early_cpu_possible_map; | ||
139 | #define for_each_possible_early_cpu(cpu) \ | ||
140 | for_each_cpu_mask((cpu), early_cpu_possible_map) | ||
141 | |||
142 | static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus) | ||
143 | { | ||
144 | int low_cpu, high_cpu; | ||
145 | int cpu; | ||
146 | int next_nid = 0; | ||
147 | |||
148 | low_cpu = cpus_weight(early_cpu_possible_map); | ||
149 | |||
150 | high_cpu = max(low_cpu, min_cpus); | ||
151 | high_cpu = min(high_cpu + reserve_cpus, NR_CPUS); | ||
152 | |||
153 | for (cpu = low_cpu; cpu < high_cpu; cpu++) { | ||
154 | cpu_set(cpu, early_cpu_possible_map); | ||
155 | if (node_cpuid[cpu].nid == NUMA_NO_NODE) { | ||
156 | node_cpuid[cpu].nid = next_nid; | ||
157 | next_nid++; | ||
158 | if (next_nid >= num_online_nodes()) | ||
159 | next_nid = 0; | ||
160 | } | ||
161 | } | ||
162 | } | ||
163 | #endif /* CONFIG_ACPI_NUMA */ | ||
164 | |||
132 | #endif /*__KERNEL__*/ | 165 | #endif /*__KERNEL__*/ |
133 | 166 | ||
134 | #endif /*_ASM_ACPI_H*/ | 167 | #endif /*_ASM_ACPI_H*/ |
diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h index 6a8a27cfae3e..3499ff57bf42 100644 --- a/include/asm-ia64/numa.h +++ b/include/asm-ia64/numa.h | |||
@@ -22,6 +22,8 @@ | |||
22 | 22 | ||
23 | #include <asm/mmzone.h> | 23 | #include <asm/mmzone.h> |
24 | 24 | ||
25 | #define NUMA_NO_NODE -1 | ||
26 | |||
25 | extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; | 27 | extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; |
26 | extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; | 28 | extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; |
27 | extern pg_data_t *pgdat_list[MAX_NUMNODES]; | 29 | extern pg_data_t *pgdat_list[MAX_NUMNODES]; |