aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRuss Anderson <rja@sgi.com>2013-04-29 18:07:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 18:54:35 -0400
commit7c243c7168dcc1bc2081fc0494923cd7cc808fb6 (patch)
tree6d29d01bdf62c64bf430187368bdef21f3919c4a
parentfed5b64a95326697f942f5003c138c7ff3043ef5 (diff)
mm: speedup in __early_pfn_to_nid
When booting on a large memory system, the kernel spends considerable time in memmap_init_zone() setting up memory zones. Analysis shows significant time spent in __early_pfn_to_nid(). The routine memmap_init_zone() checks each PFN to verify the nid is valid. __early_pfn_to_nid() sequentially scans the list of pfn ranges to find the right range and returns the nid. This does not scale well. On a 4 TB (single rack) system there are 308 memory ranges to scan. The higher the PFN the more time spent sequentially spinning through memory ranges. Since memmap_init_zone() increments pfn, it will almost always be looking for the same range as the previous pfn, so check that range first. If it is in the same range, return that nid. If not, scan the list as before. A 4 TB (single rack) UV1 system takes 512 seconds to get through the zone code. This performance optimization reduces the time by 189 seconds, a 36% improvement. A 2 TB (single rack) UV2 system goes from 212.7 seconds to 99.8 seconds, a 112.9 second (53%) reduction. [akpm@linux-foundation.org: make the statics __meminitdata] [akpm@linux-foundation.org: fix comment formatting] [akpm@linux-foundation.org: fix ia64, per yinghai] [akpm@linux-foundation.org: add missing semicolon, per Tony] Signed-off-by: Russ Anderson <rja@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: "Luck, Tony" <tony.luck@intel.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Lin Feng <linfeng@cn.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/ia64/mm/numa.c15
-rw-r--r--mm/page_alloc.c15
2 files changed, 28 insertions, 2 deletions
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index def782e31aac..4248492b9321 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -61,13 +61,26 @@ paddr_to_nid(unsigned long paddr)
61int __meminit __early_pfn_to_nid(unsigned long pfn) 61int __meminit __early_pfn_to_nid(unsigned long pfn)
62{ 62{
63 int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; 63 int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
64 /*
65 * NOTE: The following SMP-unsafe globals are only used early in boot
66 * when the kernel is running single-threaded.
67 */
68 static int __meminitdata last_ssec, last_esec;
69 static int __meminitdata last_nid;
70
71 if (section >= last_ssec && section < last_esec)
72 return last_nid;
64 73
65 for (i = 0; i < num_node_memblks; i++) { 74 for (i = 0; i < num_node_memblks; i++) {
66 ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT; 75 ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
67 esec = (node_memblk[i].start_paddr + node_memblk[i].size + 76 esec = (node_memblk[i].start_paddr + node_memblk[i].size +
68 ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT; 77 ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
69 if (section >= ssec && section < esec) 78 if (section >= ssec && section < esec) {
79 last_ssec = ssec;
80 last_esec = esec;
81 last_nid = node_memblk[i].nid;
70 return node_memblk[i].nid; 82 return node_memblk[i].nid;
83 }
71 } 84 }
72 85
73 return -1; 86 return -1;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b54c5cbf0200..5a234b64f3ac 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4187,10 +4187,23 @@ int __meminit __early_pfn_to_nid(unsigned long pfn)
4187{ 4187{
4188 unsigned long start_pfn, end_pfn; 4188 unsigned long start_pfn, end_pfn;
4189 int i, nid; 4189 int i, nid;
4190 /*
4191 * NOTE: The following SMP-unsafe globals are only used early in boot
4192 * when the kernel is running single-threaded.
4193 */
4194 static unsigned long __meminitdata last_start_pfn, last_end_pfn;
4195 static int __meminitdata last_nid;
4196
4197 if (last_start_pfn <= pfn && pfn < last_end_pfn)
4198 return last_nid;
4190 4199
4191 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) 4200 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4192 if (start_pfn <= pfn && pfn < end_pfn) 4201 if (start_pfn <= pfn && pfn < end_pfn) {
4202 last_start_pfn = start_pfn;
4203 last_end_pfn = end_pfn;
4204 last_nid = nid;
4193 return nid; 4205 return nid;
4206 }
4194 /* This is a memory hole */ 4207 /* This is a memory hole */
4195 return -1; 4208 return -1;
4196} 4209}