aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c233
1 files changed, 129 insertions, 104 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3b5358a0561f..9810f0a60db7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map);
51nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; 51nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
52EXPORT_SYMBOL(node_possible_map); 52EXPORT_SYMBOL(node_possible_map);
53unsigned long totalram_pages __read_mostly; 53unsigned long totalram_pages __read_mostly;
54unsigned long totalhigh_pages __read_mostly;
55unsigned long totalreserve_pages __read_mostly; 54unsigned long totalreserve_pages __read_mostly;
56long nr_swap_pages; 55long nr_swap_pages;
57int percpu_pagelist_fraction; 56int percpu_pagelist_fraction;
@@ -69,7 +68,15 @@ static void __free_pages_ok(struct page *page, unsigned int order);
69 * TBD: should special case ZONE_DMA32 machines here - in those we normally 68 * TBD: should special case ZONE_DMA32 machines here - in those we normally
70 * don't need any ZONE_NORMAL reservation 69 * don't need any ZONE_NORMAL reservation
71 */ 70 */
72int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; 71int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
72 256,
73#ifdef CONFIG_ZONE_DMA32
74 256,
75#endif
76#ifdef CONFIG_HIGHMEM
77 32
78#endif
79};
73 80
74EXPORT_SYMBOL(totalram_pages); 81EXPORT_SYMBOL(totalram_pages);
75 82
@@ -80,7 +87,17 @@ EXPORT_SYMBOL(totalram_pages);
80struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; 87struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
81EXPORT_SYMBOL(zone_table); 88EXPORT_SYMBOL(zone_table);
82 89
83static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; 90static char *zone_names[MAX_NR_ZONES] = {
91 "DMA",
92#ifdef CONFIG_ZONE_DMA32
93 "DMA32",
94#endif
95 "Normal",
96#ifdef CONFIG_HIGHMEM
97 "HighMem"
98#endif
99};
100
84int min_free_kbytes = 1024; 101int min_free_kbytes = 1024;
85 102
86unsigned long __meminitdata nr_kernel_pages; 103unsigned long __meminitdata nr_kernel_pages;
@@ -127,7 +144,6 @@ static int bad_range(struct zone *zone, struct page *page)
127 144
128 return 0; 145 return 0;
129} 146}
130
131#else 147#else
132static inline int bad_range(struct zone *zone, struct page *page) 148static inline int bad_range(struct zone *zone, struct page *page)
133{ 149{
@@ -218,12 +234,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
218{ 234{
219 int i; 235 int i;
220 236
221 BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); 237 VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
222 /* 238 /*
223 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO 239 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
224 * and __GFP_HIGHMEM from hard or soft interrupt context. 240 * and __GFP_HIGHMEM from hard or soft interrupt context.
225 */ 241 */
226 BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); 242 VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
227 for (i = 0; i < (1 << order); i++) 243 for (i = 0; i < (1 << order); i++)
228 clear_highpage(page + i); 244 clear_highpage(page + i);
229} 245}
@@ -347,8 +363,8 @@ static inline void __free_one_page(struct page *page,
347 363
348 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); 364 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
349 365
350 BUG_ON(page_idx & (order_size - 1)); 366 VM_BUG_ON(page_idx & (order_size - 1));
351 BUG_ON(bad_range(zone, page)); 367 VM_BUG_ON(bad_range(zone, page));
352 368
353 zone->free_pages += order_size; 369 zone->free_pages += order_size;
354 while (order < MAX_ORDER-1) { 370 while (order < MAX_ORDER-1) {
@@ -421,7 +437,7 @@ static void free_pages_bulk(struct zone *zone, int count,
421 while (count--) { 437 while (count--) {
422 struct page *page; 438 struct page *page;
423 439
424 BUG_ON(list_empty(list)); 440 VM_BUG_ON(list_empty(list));
425 page = list_entry(list->prev, struct page, lru); 441 page = list_entry(list->prev, struct page, lru);
426 /* have to delete it as __free_one_page list manipulates */ 442 /* have to delete it as __free_one_page list manipulates */
427 list_del(&page->lru); 443 list_del(&page->lru);
@@ -432,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count,
432 448
433static void free_one_page(struct zone *zone, struct page *page, int order) 449static void free_one_page(struct zone *zone, struct page *page, int order)
434{ 450{
435 LIST_HEAD(list); 451 spin_lock(&zone->lock);
436 list_add(&page->lru, &list); 452 zone->all_unreclaimable = 0;
437 free_pages_bulk(zone, 1, &list, order); 453 zone->pages_scanned = 0;
454 __free_one_page(page, zone ,order);
455 spin_unlock(&zone->lock);
438} 456}
439 457
440static void __free_pages_ok(struct page *page, unsigned int order) 458static void __free_pages_ok(struct page *page, unsigned int order)
@@ -512,7 +530,7 @@ static inline void expand(struct zone *zone, struct page *page,
512 area--; 530 area--;
513 high--; 531 high--;
514 size >>= 1; 532 size >>= 1;
515 BUG_ON(bad_range(zone, &page[size])); 533 VM_BUG_ON(bad_range(zone, &page[size]));
516 list_add(&page[size].lru, &area->free_list); 534 list_add(&page[size].lru, &area->free_list);
517 area->nr_free++; 535 area->nr_free++;
518 set_page_order(&page[size], high); 536 set_page_order(&page[size], high);
@@ -615,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
615#ifdef CONFIG_NUMA 633#ifdef CONFIG_NUMA
616/* 634/*
617 * Called from the slab reaper to drain pagesets on a particular node that 635 * Called from the slab reaper to drain pagesets on a particular node that
618 * belong to the currently executing processor. 636 * belongs to the currently executing processor.
619 * Note that this function must be called with the thread pinned to 637 * Note that this function must be called with the thread pinned to
620 * a single processor. 638 * a single processor.
621 */ 639 */
622void drain_node_pages(int nodeid) 640void drain_node_pages(int nodeid)
623{ 641{
624 int i, z; 642 int i;
643 enum zone_type z;
625 unsigned long flags; 644 unsigned long flags;
626 645
627 for (z = 0; z < MAX_NR_ZONES; z++) { 646 for (z = 0; z < MAX_NR_ZONES; z++) {
628 struct zone *zone = NODE_DATA(nodeid)->node_zones + z; 647 struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
629 struct per_cpu_pageset *pset; 648 struct per_cpu_pageset *pset;
630 649
650 if (!populated_zone(zone))
651 continue;
652
631 pset = zone_pcp(zone, smp_processor_id()); 653 pset = zone_pcp(zone, smp_processor_id());
632 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { 654 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
633 struct per_cpu_pages *pcp; 655 struct per_cpu_pages *pcp;
@@ -672,7 +694,8 @@ static void __drain_pages(unsigned int cpu)
672 694
673void mark_free_pages(struct zone *zone) 695void mark_free_pages(struct zone *zone)
674{ 696{
675 unsigned long zone_pfn, flags; 697 unsigned long pfn, max_zone_pfn;
698 unsigned long flags;
676 int order; 699 int order;
677 struct list_head *curr; 700 struct list_head *curr;
678 701
@@ -680,18 +703,25 @@ void mark_free_pages(struct zone *zone)
680 return; 703 return;
681 704
682 spin_lock_irqsave(&zone->lock, flags); 705 spin_lock_irqsave(&zone->lock, flags);
683 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) 706
684 ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); 707 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
708 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
709 if (pfn_valid(pfn)) {
710 struct page *page = pfn_to_page(pfn);
711
712 if (!PageNosave(page))
713 ClearPageNosaveFree(page);
714 }
685 715
686 for (order = MAX_ORDER - 1; order >= 0; --order) 716 for (order = MAX_ORDER - 1; order >= 0; --order)
687 list_for_each(curr, &zone->free_area[order].free_list) { 717 list_for_each(curr, &zone->free_area[order].free_list) {
688 unsigned long start_pfn, i; 718 unsigned long i;
689 719
690 start_pfn = page_to_pfn(list_entry(curr, struct page, lru)); 720 pfn = page_to_pfn(list_entry(curr, struct page, lru));
721 for (i = 0; i < (1UL << order); i++)
722 SetPageNosaveFree(pfn_to_page(pfn + i));
723 }
691 724
692 for (i=0; i < (1<<order); i++)
693 SetPageNosaveFree(pfn_to_page(start_pfn+i));
694 }
695 spin_unlock_irqrestore(&zone->lock, flags); 725 spin_unlock_irqrestore(&zone->lock, flags);
696} 726}
697 727
@@ -761,8 +791,8 @@ void split_page(struct page *page, unsigned int order)
761{ 791{
762 int i; 792 int i;
763 793
764 BUG_ON(PageCompound(page)); 794 VM_BUG_ON(PageCompound(page));
765 BUG_ON(!page_count(page)); 795 VM_BUG_ON(!page_count(page));
766 for (i = 1; i < (1 << order); i++) 796 for (i = 1; i < (1 << order); i++)
767 set_page_refcounted(page + i); 797 set_page_refcounted(page + i);
768} 798}
@@ -809,7 +839,7 @@ again:
809 local_irq_restore(flags); 839 local_irq_restore(flags);
810 put_cpu(); 840 put_cpu();
811 841
812 BUG_ON(bad_range(zone, page)); 842 VM_BUG_ON(bad_range(zone, page));
813 if (prep_new_page(page, order, gfp_flags)) 843 if (prep_new_page(page, order, gfp_flags))
814 goto again; 844 goto again;
815 return page; 845 return page;
@@ -870,32 +900,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
870 struct zone **z = zonelist->zones; 900 struct zone **z = zonelist->zones;
871 struct page *page = NULL; 901 struct page *page = NULL;
872 int classzone_idx = zone_idx(*z); 902 int classzone_idx = zone_idx(*z);
903 struct zone *zone;
873 904
874 /* 905 /*
875 * Go through the zonelist once, looking for a zone with enough free. 906 * Go through the zonelist once, looking for a zone with enough free.
876 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 907 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
877 */ 908 */
878 do { 909 do {
910 zone = *z;
911 if (unlikely((gfp_mask & __GFP_THISNODE) &&
912 zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
913 break;
879 if ((alloc_flags & ALLOC_CPUSET) && 914 if ((alloc_flags & ALLOC_CPUSET) &&
880 !cpuset_zone_allowed(*z, gfp_mask)) 915 !cpuset_zone_allowed(zone, gfp_mask))
881 continue; 916 continue;
882 917
883 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { 918 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
884 unsigned long mark; 919 unsigned long mark;
885 if (alloc_flags & ALLOC_WMARK_MIN) 920 if (alloc_flags & ALLOC_WMARK_MIN)
886 mark = (*z)->pages_min; 921 mark = zone->pages_min;
887 else if (alloc_flags & ALLOC_WMARK_LOW) 922 else if (alloc_flags & ALLOC_WMARK_LOW)
888 mark = (*z)->pages_low; 923 mark = zone->pages_low;
889 else 924 else
890 mark = (*z)->pages_high; 925 mark = zone->pages_high;
891 if (!zone_watermark_ok(*z, order, mark, 926 if (!zone_watermark_ok(zone , order, mark,
892 classzone_idx, alloc_flags)) 927 classzone_idx, alloc_flags))
893 if (!zone_reclaim_mode || 928 if (!zone_reclaim_mode ||
894 !zone_reclaim(*z, gfp_mask, order)) 929 !zone_reclaim(zone, gfp_mask, order))
895 continue; 930 continue;
896 } 931 }
897 932
898 page = buffered_rmqueue(zonelist, *z, order, gfp_mask); 933 page = buffered_rmqueue(zonelist, zone, order, gfp_mask);
899 if (page) { 934 if (page) {
900 break; 935 break;
901 } 936 }
@@ -1083,7 +1118,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
1083 * get_zeroed_page() returns a 32-bit address, which cannot represent 1118 * get_zeroed_page() returns a 32-bit address, which cannot represent
1084 * a highmem page 1119 * a highmem page
1085 */ 1120 */
1086 BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); 1121 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1087 1122
1088 page = alloc_pages(gfp_mask | __GFP_ZERO, 0); 1123 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1089 if (page) 1124 if (page)
@@ -1116,7 +1151,7 @@ EXPORT_SYMBOL(__free_pages);
1116fastcall void free_pages(unsigned long addr, unsigned int order) 1151fastcall void free_pages(unsigned long addr, unsigned int order)
1117{ 1152{
1118 if (addr != 0) { 1153 if (addr != 0) {
1119 BUG_ON(!virt_addr_valid((void *)addr)); 1154 VM_BUG_ON(!virt_addr_valid((void *)addr));
1120 __free_pages(virt_to_page((void *)addr), order); 1155 __free_pages(virt_to_page((void *)addr), order);
1121 } 1156 }
1122} 1157}
@@ -1142,7 +1177,8 @@ EXPORT_SYMBOL(nr_free_pages);
1142#ifdef CONFIG_NUMA 1177#ifdef CONFIG_NUMA
1143unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) 1178unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
1144{ 1179{
1145 unsigned int i, sum = 0; 1180 unsigned int sum = 0;
1181 enum zone_type i;
1146 1182
1147 for (i = 0; i < MAX_NR_ZONES; i++) 1183 for (i = 0; i < MAX_NR_ZONES; i++)
1148 sum += pgdat->node_zones[i].free_pages; 1184 sum += pgdat->node_zones[i].free_pages;
@@ -1186,24 +1222,10 @@ unsigned int nr_free_pagecache_pages(void)
1186{ 1222{
1187 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); 1223 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
1188} 1224}
1189
1190#ifdef CONFIG_HIGHMEM
1191unsigned int nr_free_highpages (void)
1192{
1193 pg_data_t *pgdat;
1194 unsigned int pages = 0;
1195
1196 for_each_online_pgdat(pgdat)
1197 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
1198
1199 return pages;
1200}
1201#endif
1202
1203#ifdef CONFIG_NUMA 1225#ifdef CONFIG_NUMA
1204static void show_node(struct zone *zone) 1226static void show_node(struct zone *zone)
1205{ 1227{
1206 printk("Node %d ", zone->zone_pgdat->node_id); 1228 printk("Node %ld ", zone_to_nid(zone));
1207} 1229}
1208#else 1230#else
1209#define show_node(zone) do { } while (0) 1231#define show_node(zone) do { } while (0)
@@ -1215,13 +1237,8 @@ void si_meminfo(struct sysinfo *val)
1215 val->sharedram = 0; 1237 val->sharedram = 0;
1216 val->freeram = nr_free_pages(); 1238 val->freeram = nr_free_pages();
1217 val->bufferram = nr_blockdev_pages(); 1239 val->bufferram = nr_blockdev_pages();
1218#ifdef CONFIG_HIGHMEM
1219 val->totalhigh = totalhigh_pages; 1240 val->totalhigh = totalhigh_pages;
1220 val->freehigh = nr_free_highpages(); 1241 val->freehigh = nr_free_highpages();
1221#else
1222 val->totalhigh = 0;
1223 val->freehigh = 0;
1224#endif
1225 val->mem_unit = PAGE_SIZE; 1242 val->mem_unit = PAGE_SIZE;
1226} 1243}
1227 1244
@@ -1234,8 +1251,13 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1234 1251
1235 val->totalram = pgdat->node_present_pages; 1252 val->totalram = pgdat->node_present_pages;
1236 val->freeram = nr_free_pages_pgdat(pgdat); 1253 val->freeram = nr_free_pages_pgdat(pgdat);
1254#ifdef CONFIG_HIGHMEM
1237 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; 1255 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
1238 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; 1256 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
1257#else
1258 val->totalhigh = 0;
1259 val->freehigh = 0;
1260#endif
1239 val->mem_unit = PAGE_SIZE; 1261 val->mem_unit = PAGE_SIZE;
1240} 1262}
1241#endif 1263#endif
@@ -1282,10 +1304,6 @@ void show_free_areas(void)
1282 1304
1283 get_zone_counts(&active, &inactive, &free); 1305 get_zone_counts(&active, &inactive, &free);
1284 1306
1285 printk("Free pages: %11ukB (%ukB HighMem)\n",
1286 K(nr_free_pages()),
1287 K(nr_free_highpages()));
1288
1289 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " 1307 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
1290 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", 1308 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
1291 active, 1309 active,
@@ -1294,7 +1312,8 @@ void show_free_areas(void)
1294 global_page_state(NR_WRITEBACK), 1312 global_page_state(NR_WRITEBACK),
1295 global_page_state(NR_UNSTABLE_NFS), 1313 global_page_state(NR_UNSTABLE_NFS),
1296 nr_free_pages(), 1314 nr_free_pages(),
1297 global_page_state(NR_SLAB), 1315 global_page_state(NR_SLAB_RECLAIMABLE) +
1316 global_page_state(NR_SLAB_UNRECLAIMABLE),
1298 global_page_state(NR_FILE_MAPPED), 1317 global_page_state(NR_FILE_MAPPED),
1299 global_page_state(NR_PAGETABLE)); 1318 global_page_state(NR_PAGETABLE));
1300 1319
@@ -1360,39 +1379,25 @@ void show_free_areas(void)
1360 * Add all populated zones of a node to the zonelist. 1379 * Add all populated zones of a node to the zonelist.
1361 */ 1380 */
1362static int __meminit build_zonelists_node(pg_data_t *pgdat, 1381static int __meminit build_zonelists_node(pg_data_t *pgdat,
1363 struct zonelist *zonelist, int nr_zones, int zone_type) 1382 struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
1364{ 1383{
1365 struct zone *zone; 1384 struct zone *zone;
1366 1385
1367 BUG_ON(zone_type > ZONE_HIGHMEM); 1386 BUG_ON(zone_type >= MAX_NR_ZONES);
1387 zone_type++;
1368 1388
1369 do { 1389 do {
1390 zone_type--;
1370 zone = pgdat->node_zones + zone_type; 1391 zone = pgdat->node_zones + zone_type;
1371 if (populated_zone(zone)) { 1392 if (populated_zone(zone)) {
1372#ifndef CONFIG_HIGHMEM
1373 BUG_ON(zone_type > ZONE_NORMAL);
1374#endif
1375 zonelist->zones[nr_zones++] = zone; 1393 zonelist->zones[nr_zones++] = zone;
1376 check_highest_zone(zone_type); 1394 check_highest_zone(zone_type);
1377 } 1395 }
1378 zone_type--;
1379 1396
1380 } while (zone_type >= 0); 1397 } while (zone_type);
1381 return nr_zones; 1398 return nr_zones;
1382} 1399}
1383 1400
1384static inline int highest_zone(int zone_bits)
1385{
1386 int res = ZONE_NORMAL;
1387 if (zone_bits & (__force int)__GFP_HIGHMEM)
1388 res = ZONE_HIGHMEM;
1389 if (zone_bits & (__force int)__GFP_DMA32)
1390 res = ZONE_DMA32;
1391 if (zone_bits & (__force int)__GFP_DMA)
1392 res = ZONE_DMA;
1393 return res;
1394}
1395
1396#ifdef CONFIG_NUMA 1401#ifdef CONFIG_NUMA
1397#define MAX_NODE_LOAD (num_online_nodes()) 1402#define MAX_NODE_LOAD (num_online_nodes())
1398static int __meminitdata node_load[MAX_NUMNODES]; 1403static int __meminitdata node_load[MAX_NUMNODES];
@@ -1458,13 +1463,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
1458 1463
1459static void __meminit build_zonelists(pg_data_t *pgdat) 1464static void __meminit build_zonelists(pg_data_t *pgdat)
1460{ 1465{
1461 int i, j, k, node, local_node; 1466 int j, node, local_node;
1467 enum zone_type i;
1462 int prev_node, load; 1468 int prev_node, load;
1463 struct zonelist *zonelist; 1469 struct zonelist *zonelist;
1464 nodemask_t used_mask; 1470 nodemask_t used_mask;
1465 1471
1466 /* initialize zonelists */ 1472 /* initialize zonelists */
1467 for (i = 0; i < GFP_ZONETYPES; i++) { 1473 for (i = 0; i < MAX_NR_ZONES; i++) {
1468 zonelist = pgdat->node_zonelists + i; 1474 zonelist = pgdat->node_zonelists + i;
1469 zonelist->zones[0] = NULL; 1475 zonelist->zones[0] = NULL;
1470 } 1476 }
@@ -1494,13 +1500,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
1494 node_load[node] += load; 1500 node_load[node] += load;
1495 prev_node = node; 1501 prev_node = node;
1496 load--; 1502 load--;
1497 for (i = 0; i < GFP_ZONETYPES; i++) { 1503 for (i = 0; i < MAX_NR_ZONES; i++) {
1498 zonelist = pgdat->node_zonelists + i; 1504 zonelist = pgdat->node_zonelists + i;
1499 for (j = 0; zonelist->zones[j] != NULL; j++); 1505 for (j = 0; zonelist->zones[j] != NULL; j++);
1500 1506
1501 k = highest_zone(i); 1507 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1502
1503 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1504 zonelist->zones[j] = NULL; 1508 zonelist->zones[j] = NULL;
1505 } 1509 }
1506 } 1510 }
@@ -1510,17 +1514,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
1510 1514
1511static void __meminit build_zonelists(pg_data_t *pgdat) 1515static void __meminit build_zonelists(pg_data_t *pgdat)
1512{ 1516{
1513 int i, j, k, node, local_node; 1517 int node, local_node;
1518 enum zone_type i,j;
1514 1519
1515 local_node = pgdat->node_id; 1520 local_node = pgdat->node_id;
1516 for (i = 0; i < GFP_ZONETYPES; i++) { 1521 for (i = 0; i < MAX_NR_ZONES; i++) {
1517 struct zonelist *zonelist; 1522 struct zonelist *zonelist;
1518 1523
1519 zonelist = pgdat->node_zonelists + i; 1524 zonelist = pgdat->node_zonelists + i;
1520 1525
1521 j = 0; 1526 j = build_zonelists_node(pgdat, zonelist, 0, i);
1522 k = highest_zone(i);
1523 j = build_zonelists_node(pgdat, zonelist, j, k);
1524 /* 1527 /*
1525 * Now we build the zonelist so that it contains the zones 1528 * Now we build the zonelist so that it contains the zones
1526 * of all the other nodes. 1529 * of all the other nodes.
@@ -1532,12 +1535,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
1532 for (node = local_node + 1; node < MAX_NUMNODES; node++) { 1535 for (node = local_node + 1; node < MAX_NUMNODES; node++) {
1533 if (!node_online(node)) 1536 if (!node_online(node))
1534 continue; 1537 continue;
1535 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); 1538 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1536 } 1539 }
1537 for (node = 0; node < local_node; node++) { 1540 for (node = 0; node < local_node; node++) {
1538 if (!node_online(node)) 1541 if (!node_online(node))
1539 continue; 1542 continue;
1540 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); 1543 j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1541 } 1544 }
1542 1545
1543 zonelist->zones[j] = NULL; 1546 zonelist->zones[j] = NULL;
@@ -1643,7 +1646,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
1643 unsigned long *zones_size, unsigned long *zholes_size) 1646 unsigned long *zones_size, unsigned long *zholes_size)
1644{ 1647{
1645 unsigned long realtotalpages, totalpages = 0; 1648 unsigned long realtotalpages, totalpages = 0;
1646 int i; 1649 enum zone_type i;
1647 1650
1648 for (i = 0; i < MAX_NR_ZONES; i++) 1651 for (i = 0; i < MAX_NR_ZONES; i++)
1649 totalpages += zones_size[i]; 1652 totalpages += zones_size[i];
@@ -1698,8 +1701,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
1698} 1701}
1699 1702
1700#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) 1703#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
1701void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, 1704void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
1702 unsigned long size) 1705 unsigned long pfn, unsigned long size)
1703{ 1706{
1704 unsigned long snum = pfn_to_section_nr(pfn); 1707 unsigned long snum = pfn_to_section_nr(pfn);
1705 unsigned long end = pfn_to_section_nr(pfn + size); 1708 unsigned long end = pfn_to_section_nr(pfn + size);
@@ -1845,8 +1848,10 @@ static inline void free_zone_pagesets(int cpu)
1845 for_each_zone(zone) { 1848 for_each_zone(zone) {
1846 struct per_cpu_pageset *pset = zone_pcp(zone, cpu); 1849 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
1847 1850
1851 /* Free per_cpu_pageset if it is slab allocated */
1852 if (pset != &boot_pageset[cpu])
1853 kfree(pset);
1848 zone_pcp(zone, cpu) = NULL; 1854 zone_pcp(zone, cpu) = NULL;
1849 kfree(pset);
1850 } 1855 }
1851} 1856}
1852 1857
@@ -1981,7 +1986,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
1981static void __meminit free_area_init_core(struct pglist_data *pgdat, 1986static void __meminit free_area_init_core(struct pglist_data *pgdat,
1982 unsigned long *zones_size, unsigned long *zholes_size) 1987 unsigned long *zones_size, unsigned long *zholes_size)
1983{ 1988{
1984 unsigned long j; 1989 enum zone_type j;
1985 int nid = pgdat->node_id; 1990 int nid = pgdat->node_id;
1986 unsigned long zone_start_pfn = pgdat->node_start_pfn; 1991 unsigned long zone_start_pfn = pgdat->node_start_pfn;
1987 int ret; 1992 int ret;
@@ -1999,15 +2004,16 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
1999 if (zholes_size) 2004 if (zholes_size)
2000 realsize -= zholes_size[j]; 2005 realsize -= zholes_size[j];
2001 2006
2002 if (j < ZONE_HIGHMEM) 2007 if (!is_highmem_idx(j))
2003 nr_kernel_pages += realsize; 2008 nr_kernel_pages += realsize;
2004 nr_all_pages += realsize; 2009 nr_all_pages += realsize;
2005 2010
2006 zone->spanned_pages = size; 2011 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2012 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA 2013#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) 2014 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
2010 / 100; 2015 / 100;
2016 zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
2011#endif 2017#endif
2012 zone->name = zone_names[j]; 2018 zone->name = zone_names[j];
2013 spin_lock_init(&zone->lock); 2019 spin_lock_init(&zone->lock);
@@ -2129,7 +2135,7 @@ static void calculate_totalreserve_pages(void)
2129{ 2135{
2130 struct pglist_data *pgdat; 2136 struct pglist_data *pgdat;
2131 unsigned long reserve_pages = 0; 2137 unsigned long reserve_pages = 0;
2132 int i, j; 2138 enum zone_type i, j;
2133 2139
2134 for_each_online_pgdat(pgdat) { 2140 for_each_online_pgdat(pgdat) {
2135 for (i = 0; i < MAX_NR_ZONES; i++) { 2141 for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -2162,7 +2168,7 @@ static void calculate_totalreserve_pages(void)
2162static void setup_per_zone_lowmem_reserve(void) 2168static void setup_per_zone_lowmem_reserve(void)
2163{ 2169{
2164 struct pglist_data *pgdat; 2170 struct pglist_data *pgdat;
2165 int j, idx; 2171 enum zone_type j, idx;
2166 2172
2167 for_each_online_pgdat(pgdat) { 2173 for_each_online_pgdat(pgdat) {
2168 for (j = 0; j < MAX_NR_ZONES; j++) { 2174 for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -2171,9 +2177,12 @@ static void setup_per_zone_lowmem_reserve(void)
2171 2177
2172 zone->lowmem_reserve[j] = 0; 2178 zone->lowmem_reserve[j] = 0;
2173 2179
2174 for (idx = j-1; idx >= 0; idx--) { 2180 idx = j;
2181 while (idx) {
2175 struct zone *lower_zone; 2182 struct zone *lower_zone;
2176 2183
2184 idx--;
2185
2177 if (sysctl_lowmem_reserve_ratio[idx] < 1) 2186 if (sysctl_lowmem_reserve_ratio[idx] < 1)
2178 sysctl_lowmem_reserve_ratio[idx] = 1; 2187 sysctl_lowmem_reserve_ratio[idx] = 1;
2179 2188
@@ -2314,10 +2323,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2314 return rc; 2323 return rc;
2315 2324
2316 for_each_zone(zone) 2325 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages * 2326 zone->min_unmapped_pages = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100; 2327 sysctl_min_unmapped_ratio) / 100;
2319 return 0; 2328 return 0;
2320} 2329}
2330
2331int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
2332 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2333{
2334 struct zone *zone;
2335 int rc;
2336
2337 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2338 if (rc)
2339 return rc;
2340
2341 for_each_zone(zone)
2342 zone->min_slab_pages = (zone->present_pages *
2343 sysctl_min_slab_ratio) / 100;
2344 return 0;
2345}
2321#endif 2346#endif
2322 2347
2323/* 2348/*