diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 233 |
1 files changed, 129 insertions, 104 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3b5358a0561f..9810f0a60db7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map); | |||
51 | nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; | 51 | nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; |
52 | EXPORT_SYMBOL(node_possible_map); | 52 | EXPORT_SYMBOL(node_possible_map); |
53 | unsigned long totalram_pages __read_mostly; | 53 | unsigned long totalram_pages __read_mostly; |
54 | unsigned long totalhigh_pages __read_mostly; | ||
55 | unsigned long totalreserve_pages __read_mostly; | 54 | unsigned long totalreserve_pages __read_mostly; |
56 | long nr_swap_pages; | 55 | long nr_swap_pages; |
57 | int percpu_pagelist_fraction; | 56 | int percpu_pagelist_fraction; |
@@ -69,7 +68,15 @@ static void __free_pages_ok(struct page *page, unsigned int order); | |||
69 | * TBD: should special case ZONE_DMA32 machines here - in those we normally | 68 | * TBD: should special case ZONE_DMA32 machines here - in those we normally |
70 | * don't need any ZONE_NORMAL reservation | 69 | * don't need any ZONE_NORMAL reservation |
71 | */ | 70 | */ |
72 | int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; | 71 | int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { |
72 | 256, | ||
73 | #ifdef CONFIG_ZONE_DMA32 | ||
74 | 256, | ||
75 | #endif | ||
76 | #ifdef CONFIG_HIGHMEM | ||
77 | 32 | ||
78 | #endif | ||
79 | }; | ||
73 | 80 | ||
74 | EXPORT_SYMBOL(totalram_pages); | 81 | EXPORT_SYMBOL(totalram_pages); |
75 | 82 | ||
@@ -80,7 +87,17 @@ EXPORT_SYMBOL(totalram_pages); | |||
80 | struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; | 87 | struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; |
81 | EXPORT_SYMBOL(zone_table); | 88 | EXPORT_SYMBOL(zone_table); |
82 | 89 | ||
83 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; | 90 | static char *zone_names[MAX_NR_ZONES] = { |
91 | "DMA", | ||
92 | #ifdef CONFIG_ZONE_DMA32 | ||
93 | "DMA32", | ||
94 | #endif | ||
95 | "Normal", | ||
96 | #ifdef CONFIG_HIGHMEM | ||
97 | "HighMem" | ||
98 | #endif | ||
99 | }; | ||
100 | |||
84 | int min_free_kbytes = 1024; | 101 | int min_free_kbytes = 1024; |
85 | 102 | ||
86 | unsigned long __meminitdata nr_kernel_pages; | 103 | unsigned long __meminitdata nr_kernel_pages; |
@@ -127,7 +144,6 @@ static int bad_range(struct zone *zone, struct page *page) | |||
127 | 144 | ||
128 | return 0; | 145 | return 0; |
129 | } | 146 | } |
130 | |||
131 | #else | 147 | #else |
132 | static inline int bad_range(struct zone *zone, struct page *page) | 148 | static inline int bad_range(struct zone *zone, struct page *page) |
133 | { | 149 | { |
@@ -218,12 +234,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | |||
218 | { | 234 | { |
219 | int i; | 235 | int i; |
220 | 236 | ||
221 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | 237 | VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); |
222 | /* | 238 | /* |
223 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO | 239 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO |
224 | * and __GFP_HIGHMEM from hard or soft interrupt context. | 240 | * and __GFP_HIGHMEM from hard or soft interrupt context. |
225 | */ | 241 | */ |
226 | BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); | 242 | VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); |
227 | for (i = 0; i < (1 << order); i++) | 243 | for (i = 0; i < (1 << order); i++) |
228 | clear_highpage(page + i); | 244 | clear_highpage(page + i); |
229 | } | 245 | } |
@@ -347,8 +363,8 @@ static inline void __free_one_page(struct page *page, | |||
347 | 363 | ||
348 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | 364 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); |
349 | 365 | ||
350 | BUG_ON(page_idx & (order_size - 1)); | 366 | VM_BUG_ON(page_idx & (order_size - 1)); |
351 | BUG_ON(bad_range(zone, page)); | 367 | VM_BUG_ON(bad_range(zone, page)); |
352 | 368 | ||
353 | zone->free_pages += order_size; | 369 | zone->free_pages += order_size; |
354 | while (order < MAX_ORDER-1) { | 370 | while (order < MAX_ORDER-1) { |
@@ -421,7 +437,7 @@ static void free_pages_bulk(struct zone *zone, int count, | |||
421 | while (count--) { | 437 | while (count--) { |
422 | struct page *page; | 438 | struct page *page; |
423 | 439 | ||
424 | BUG_ON(list_empty(list)); | 440 | VM_BUG_ON(list_empty(list)); |
425 | page = list_entry(list->prev, struct page, lru); | 441 | page = list_entry(list->prev, struct page, lru); |
426 | /* have to delete it as __free_one_page list manipulates */ | 442 | /* have to delete it as __free_one_page list manipulates */ |
427 | list_del(&page->lru); | 443 | list_del(&page->lru); |
@@ -432,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count, | |||
432 | 448 | ||
433 | static void free_one_page(struct zone *zone, struct page *page, int order) | 449 | static void free_one_page(struct zone *zone, struct page *page, int order) |
434 | { | 450 | { |
435 | LIST_HEAD(list); | 451 | spin_lock(&zone->lock); |
436 | list_add(&page->lru, &list); | 452 | zone->all_unreclaimable = 0; |
437 | free_pages_bulk(zone, 1, &list, order); | 453 | zone->pages_scanned = 0; |
454 | __free_one_page(page, zone ,order); | ||
455 | spin_unlock(&zone->lock); | ||
438 | } | 456 | } |
439 | 457 | ||
440 | static void __free_pages_ok(struct page *page, unsigned int order) | 458 | static void __free_pages_ok(struct page *page, unsigned int order) |
@@ -512,7 +530,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
512 | area--; | 530 | area--; |
513 | high--; | 531 | high--; |
514 | size >>= 1; | 532 | size >>= 1; |
515 | BUG_ON(bad_range(zone, &page[size])); | 533 | VM_BUG_ON(bad_range(zone, &page[size])); |
516 | list_add(&page[size].lru, &area->free_list); | 534 | list_add(&page[size].lru, &area->free_list); |
517 | area->nr_free++; | 535 | area->nr_free++; |
518 | set_page_order(&page[size], high); | 536 | set_page_order(&page[size], high); |
@@ -615,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
615 | #ifdef CONFIG_NUMA | 633 | #ifdef CONFIG_NUMA |
616 | /* | 634 | /* |
617 | * Called from the slab reaper to drain pagesets on a particular node that | 635 | * Called from the slab reaper to drain pagesets on a particular node that |
618 | * belong to the currently executing processor. | 636 | * belongs to the currently executing processor. |
619 | * Note that this function must be called with the thread pinned to | 637 | * Note that this function must be called with the thread pinned to |
620 | * a single processor. | 638 | * a single processor. |
621 | */ | 639 | */ |
622 | void drain_node_pages(int nodeid) | 640 | void drain_node_pages(int nodeid) |
623 | { | 641 | { |
624 | int i, z; | 642 | int i; |
643 | enum zone_type z; | ||
625 | unsigned long flags; | 644 | unsigned long flags; |
626 | 645 | ||
627 | for (z = 0; z < MAX_NR_ZONES; z++) { | 646 | for (z = 0; z < MAX_NR_ZONES; z++) { |
628 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 647 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; |
629 | struct per_cpu_pageset *pset; | 648 | struct per_cpu_pageset *pset; |
630 | 649 | ||
650 | if (!populated_zone(zone)) | ||
651 | continue; | ||
652 | |||
631 | pset = zone_pcp(zone, smp_processor_id()); | 653 | pset = zone_pcp(zone, smp_processor_id()); |
632 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 654 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
633 | struct per_cpu_pages *pcp; | 655 | struct per_cpu_pages *pcp; |
@@ -672,7 +694,8 @@ static void __drain_pages(unsigned int cpu) | |||
672 | 694 | ||
673 | void mark_free_pages(struct zone *zone) | 695 | void mark_free_pages(struct zone *zone) |
674 | { | 696 | { |
675 | unsigned long zone_pfn, flags; | 697 | unsigned long pfn, max_zone_pfn; |
698 | unsigned long flags; | ||
676 | int order; | 699 | int order; |
677 | struct list_head *curr; | 700 | struct list_head *curr; |
678 | 701 | ||
@@ -680,18 +703,25 @@ void mark_free_pages(struct zone *zone) | |||
680 | return; | 703 | return; |
681 | 704 | ||
682 | spin_lock_irqsave(&zone->lock, flags); | 705 | spin_lock_irqsave(&zone->lock, flags); |
683 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 706 | |
684 | ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); | 707 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
708 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | ||
709 | if (pfn_valid(pfn)) { | ||
710 | struct page *page = pfn_to_page(pfn); | ||
711 | |||
712 | if (!PageNosave(page)) | ||
713 | ClearPageNosaveFree(page); | ||
714 | } | ||
685 | 715 | ||
686 | for (order = MAX_ORDER - 1; order >= 0; --order) | 716 | for (order = MAX_ORDER - 1; order >= 0; --order) |
687 | list_for_each(curr, &zone->free_area[order].free_list) { | 717 | list_for_each(curr, &zone->free_area[order].free_list) { |
688 | unsigned long start_pfn, i; | 718 | unsigned long i; |
689 | 719 | ||
690 | start_pfn = page_to_pfn(list_entry(curr, struct page, lru)); | 720 | pfn = page_to_pfn(list_entry(curr, struct page, lru)); |
721 | for (i = 0; i < (1UL << order); i++) | ||
722 | SetPageNosaveFree(pfn_to_page(pfn + i)); | ||
723 | } | ||
691 | 724 | ||
692 | for (i=0; i < (1<<order); i++) | ||
693 | SetPageNosaveFree(pfn_to_page(start_pfn+i)); | ||
694 | } | ||
695 | spin_unlock_irqrestore(&zone->lock, flags); | 725 | spin_unlock_irqrestore(&zone->lock, flags); |
696 | } | 726 | } |
697 | 727 | ||
@@ -761,8 +791,8 @@ void split_page(struct page *page, unsigned int order) | |||
761 | { | 791 | { |
762 | int i; | 792 | int i; |
763 | 793 | ||
764 | BUG_ON(PageCompound(page)); | 794 | VM_BUG_ON(PageCompound(page)); |
765 | BUG_ON(!page_count(page)); | 795 | VM_BUG_ON(!page_count(page)); |
766 | for (i = 1; i < (1 << order); i++) | 796 | for (i = 1; i < (1 << order); i++) |
767 | set_page_refcounted(page + i); | 797 | set_page_refcounted(page + i); |
768 | } | 798 | } |
@@ -809,7 +839,7 @@ again: | |||
809 | local_irq_restore(flags); | 839 | local_irq_restore(flags); |
810 | put_cpu(); | 840 | put_cpu(); |
811 | 841 | ||
812 | BUG_ON(bad_range(zone, page)); | 842 | VM_BUG_ON(bad_range(zone, page)); |
813 | if (prep_new_page(page, order, gfp_flags)) | 843 | if (prep_new_page(page, order, gfp_flags)) |
814 | goto again; | 844 | goto again; |
815 | return page; | 845 | return page; |
@@ -870,32 +900,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
870 | struct zone **z = zonelist->zones; | 900 | struct zone **z = zonelist->zones; |
871 | struct page *page = NULL; | 901 | struct page *page = NULL; |
872 | int classzone_idx = zone_idx(*z); | 902 | int classzone_idx = zone_idx(*z); |
903 | struct zone *zone; | ||
873 | 904 | ||
874 | /* | 905 | /* |
875 | * Go through the zonelist once, looking for a zone with enough free. | 906 | * Go through the zonelist once, looking for a zone with enough free. |
876 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 907 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
877 | */ | 908 | */ |
878 | do { | 909 | do { |
910 | zone = *z; | ||
911 | if (unlikely((gfp_mask & __GFP_THISNODE) && | ||
912 | zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) | ||
913 | break; | ||
879 | if ((alloc_flags & ALLOC_CPUSET) && | 914 | if ((alloc_flags & ALLOC_CPUSET) && |
880 | !cpuset_zone_allowed(*z, gfp_mask)) | 915 | !cpuset_zone_allowed(zone, gfp_mask)) |
881 | continue; | 916 | continue; |
882 | 917 | ||
883 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 918 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |
884 | unsigned long mark; | 919 | unsigned long mark; |
885 | if (alloc_flags & ALLOC_WMARK_MIN) | 920 | if (alloc_flags & ALLOC_WMARK_MIN) |
886 | mark = (*z)->pages_min; | 921 | mark = zone->pages_min; |
887 | else if (alloc_flags & ALLOC_WMARK_LOW) | 922 | else if (alloc_flags & ALLOC_WMARK_LOW) |
888 | mark = (*z)->pages_low; | 923 | mark = zone->pages_low; |
889 | else | 924 | else |
890 | mark = (*z)->pages_high; | 925 | mark = zone->pages_high; |
891 | if (!zone_watermark_ok(*z, order, mark, | 926 | if (!zone_watermark_ok(zone , order, mark, |
892 | classzone_idx, alloc_flags)) | 927 | classzone_idx, alloc_flags)) |
893 | if (!zone_reclaim_mode || | 928 | if (!zone_reclaim_mode || |
894 | !zone_reclaim(*z, gfp_mask, order)) | 929 | !zone_reclaim(zone, gfp_mask, order)) |
895 | continue; | 930 | continue; |
896 | } | 931 | } |
897 | 932 | ||
898 | page = buffered_rmqueue(zonelist, *z, order, gfp_mask); | 933 | page = buffered_rmqueue(zonelist, zone, order, gfp_mask); |
899 | if (page) { | 934 | if (page) { |
900 | break; | 935 | break; |
901 | } | 936 | } |
@@ -1083,7 +1118,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) | |||
1083 | * get_zeroed_page() returns a 32-bit address, which cannot represent | 1118 | * get_zeroed_page() returns a 32-bit address, which cannot represent |
1084 | * a highmem page | 1119 | * a highmem page |
1085 | */ | 1120 | */ |
1086 | BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | 1121 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); |
1087 | 1122 | ||
1088 | page = alloc_pages(gfp_mask | __GFP_ZERO, 0); | 1123 | page = alloc_pages(gfp_mask | __GFP_ZERO, 0); |
1089 | if (page) | 1124 | if (page) |
@@ -1116,7 +1151,7 @@ EXPORT_SYMBOL(__free_pages); | |||
1116 | fastcall void free_pages(unsigned long addr, unsigned int order) | 1151 | fastcall void free_pages(unsigned long addr, unsigned int order) |
1117 | { | 1152 | { |
1118 | if (addr != 0) { | 1153 | if (addr != 0) { |
1119 | BUG_ON(!virt_addr_valid((void *)addr)); | 1154 | VM_BUG_ON(!virt_addr_valid((void *)addr)); |
1120 | __free_pages(virt_to_page((void *)addr), order); | 1155 | __free_pages(virt_to_page((void *)addr), order); |
1121 | } | 1156 | } |
1122 | } | 1157 | } |
@@ -1142,7 +1177,8 @@ EXPORT_SYMBOL(nr_free_pages); | |||
1142 | #ifdef CONFIG_NUMA | 1177 | #ifdef CONFIG_NUMA |
1143 | unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) | 1178 | unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) |
1144 | { | 1179 | { |
1145 | unsigned int i, sum = 0; | 1180 | unsigned int sum = 0; |
1181 | enum zone_type i; | ||
1146 | 1182 | ||
1147 | for (i = 0; i < MAX_NR_ZONES; i++) | 1183 | for (i = 0; i < MAX_NR_ZONES; i++) |
1148 | sum += pgdat->node_zones[i].free_pages; | 1184 | sum += pgdat->node_zones[i].free_pages; |
@@ -1186,24 +1222,10 @@ unsigned int nr_free_pagecache_pages(void) | |||
1186 | { | 1222 | { |
1187 | return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); | 1223 | return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); |
1188 | } | 1224 | } |
1189 | |||
1190 | #ifdef CONFIG_HIGHMEM | ||
1191 | unsigned int nr_free_highpages (void) | ||
1192 | { | ||
1193 | pg_data_t *pgdat; | ||
1194 | unsigned int pages = 0; | ||
1195 | |||
1196 | for_each_online_pgdat(pgdat) | ||
1197 | pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; | ||
1198 | |||
1199 | return pages; | ||
1200 | } | ||
1201 | #endif | ||
1202 | |||
1203 | #ifdef CONFIG_NUMA | 1225 | #ifdef CONFIG_NUMA |
1204 | static void show_node(struct zone *zone) | 1226 | static void show_node(struct zone *zone) |
1205 | { | 1227 | { |
1206 | printk("Node %d ", zone->zone_pgdat->node_id); | 1228 | printk("Node %ld ", zone_to_nid(zone)); |
1207 | } | 1229 | } |
1208 | #else | 1230 | #else |
1209 | #define show_node(zone) do { } while (0) | 1231 | #define show_node(zone) do { } while (0) |
@@ -1215,13 +1237,8 @@ void si_meminfo(struct sysinfo *val) | |||
1215 | val->sharedram = 0; | 1237 | val->sharedram = 0; |
1216 | val->freeram = nr_free_pages(); | 1238 | val->freeram = nr_free_pages(); |
1217 | val->bufferram = nr_blockdev_pages(); | 1239 | val->bufferram = nr_blockdev_pages(); |
1218 | #ifdef CONFIG_HIGHMEM | ||
1219 | val->totalhigh = totalhigh_pages; | 1240 | val->totalhigh = totalhigh_pages; |
1220 | val->freehigh = nr_free_highpages(); | 1241 | val->freehigh = nr_free_highpages(); |
1221 | #else | ||
1222 | val->totalhigh = 0; | ||
1223 | val->freehigh = 0; | ||
1224 | #endif | ||
1225 | val->mem_unit = PAGE_SIZE; | 1242 | val->mem_unit = PAGE_SIZE; |
1226 | } | 1243 | } |
1227 | 1244 | ||
@@ -1234,8 +1251,13 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
1234 | 1251 | ||
1235 | val->totalram = pgdat->node_present_pages; | 1252 | val->totalram = pgdat->node_present_pages; |
1236 | val->freeram = nr_free_pages_pgdat(pgdat); | 1253 | val->freeram = nr_free_pages_pgdat(pgdat); |
1254 | #ifdef CONFIG_HIGHMEM | ||
1237 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; | 1255 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; |
1238 | val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; | 1256 | val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; |
1257 | #else | ||
1258 | val->totalhigh = 0; | ||
1259 | val->freehigh = 0; | ||
1260 | #endif | ||
1239 | val->mem_unit = PAGE_SIZE; | 1261 | val->mem_unit = PAGE_SIZE; |
1240 | } | 1262 | } |
1241 | #endif | 1263 | #endif |
@@ -1282,10 +1304,6 @@ void show_free_areas(void) | |||
1282 | 1304 | ||
1283 | get_zone_counts(&active, &inactive, &free); | 1305 | get_zone_counts(&active, &inactive, &free); |
1284 | 1306 | ||
1285 | printk("Free pages: %11ukB (%ukB HighMem)\n", | ||
1286 | K(nr_free_pages()), | ||
1287 | K(nr_free_highpages())); | ||
1288 | |||
1289 | printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " | 1307 | printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " |
1290 | "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", | 1308 | "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", |
1291 | active, | 1309 | active, |
@@ -1294,7 +1312,8 @@ void show_free_areas(void) | |||
1294 | global_page_state(NR_WRITEBACK), | 1312 | global_page_state(NR_WRITEBACK), |
1295 | global_page_state(NR_UNSTABLE_NFS), | 1313 | global_page_state(NR_UNSTABLE_NFS), |
1296 | nr_free_pages(), | 1314 | nr_free_pages(), |
1297 | global_page_state(NR_SLAB), | 1315 | global_page_state(NR_SLAB_RECLAIMABLE) + |
1316 | global_page_state(NR_SLAB_UNRECLAIMABLE), | ||
1298 | global_page_state(NR_FILE_MAPPED), | 1317 | global_page_state(NR_FILE_MAPPED), |
1299 | global_page_state(NR_PAGETABLE)); | 1318 | global_page_state(NR_PAGETABLE)); |
1300 | 1319 | ||
@@ -1360,39 +1379,25 @@ void show_free_areas(void) | |||
1360 | * Add all populated zones of a node to the zonelist. | 1379 | * Add all populated zones of a node to the zonelist. |
1361 | */ | 1380 | */ |
1362 | static int __meminit build_zonelists_node(pg_data_t *pgdat, | 1381 | static int __meminit build_zonelists_node(pg_data_t *pgdat, |
1363 | struct zonelist *zonelist, int nr_zones, int zone_type) | 1382 | struct zonelist *zonelist, int nr_zones, enum zone_type zone_type) |
1364 | { | 1383 | { |
1365 | struct zone *zone; | 1384 | struct zone *zone; |
1366 | 1385 | ||
1367 | BUG_ON(zone_type > ZONE_HIGHMEM); | 1386 | BUG_ON(zone_type >= MAX_NR_ZONES); |
1387 | zone_type++; | ||
1368 | 1388 | ||
1369 | do { | 1389 | do { |
1390 | zone_type--; | ||
1370 | zone = pgdat->node_zones + zone_type; | 1391 | zone = pgdat->node_zones + zone_type; |
1371 | if (populated_zone(zone)) { | 1392 | if (populated_zone(zone)) { |
1372 | #ifndef CONFIG_HIGHMEM | ||
1373 | BUG_ON(zone_type > ZONE_NORMAL); | ||
1374 | #endif | ||
1375 | zonelist->zones[nr_zones++] = zone; | 1393 | zonelist->zones[nr_zones++] = zone; |
1376 | check_highest_zone(zone_type); | 1394 | check_highest_zone(zone_type); |
1377 | } | 1395 | } |
1378 | zone_type--; | ||
1379 | 1396 | ||
1380 | } while (zone_type >= 0); | 1397 | } while (zone_type); |
1381 | return nr_zones; | 1398 | return nr_zones; |
1382 | } | 1399 | } |
1383 | 1400 | ||
1384 | static inline int highest_zone(int zone_bits) | ||
1385 | { | ||
1386 | int res = ZONE_NORMAL; | ||
1387 | if (zone_bits & (__force int)__GFP_HIGHMEM) | ||
1388 | res = ZONE_HIGHMEM; | ||
1389 | if (zone_bits & (__force int)__GFP_DMA32) | ||
1390 | res = ZONE_DMA32; | ||
1391 | if (zone_bits & (__force int)__GFP_DMA) | ||
1392 | res = ZONE_DMA; | ||
1393 | return res; | ||
1394 | } | ||
1395 | |||
1396 | #ifdef CONFIG_NUMA | 1401 | #ifdef CONFIG_NUMA |
1397 | #define MAX_NODE_LOAD (num_online_nodes()) | 1402 | #define MAX_NODE_LOAD (num_online_nodes()) |
1398 | static int __meminitdata node_load[MAX_NUMNODES]; | 1403 | static int __meminitdata node_load[MAX_NUMNODES]; |
@@ -1458,13 +1463,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) | |||
1458 | 1463 | ||
1459 | static void __meminit build_zonelists(pg_data_t *pgdat) | 1464 | static void __meminit build_zonelists(pg_data_t *pgdat) |
1460 | { | 1465 | { |
1461 | int i, j, k, node, local_node; | 1466 | int j, node, local_node; |
1467 | enum zone_type i; | ||
1462 | int prev_node, load; | 1468 | int prev_node, load; |
1463 | struct zonelist *zonelist; | 1469 | struct zonelist *zonelist; |
1464 | nodemask_t used_mask; | 1470 | nodemask_t used_mask; |
1465 | 1471 | ||
1466 | /* initialize zonelists */ | 1472 | /* initialize zonelists */ |
1467 | for (i = 0; i < GFP_ZONETYPES; i++) { | 1473 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1468 | zonelist = pgdat->node_zonelists + i; | 1474 | zonelist = pgdat->node_zonelists + i; |
1469 | zonelist->zones[0] = NULL; | 1475 | zonelist->zones[0] = NULL; |
1470 | } | 1476 | } |
@@ -1494,13 +1500,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat) | |||
1494 | node_load[node] += load; | 1500 | node_load[node] += load; |
1495 | prev_node = node; | 1501 | prev_node = node; |
1496 | load--; | 1502 | load--; |
1497 | for (i = 0; i < GFP_ZONETYPES; i++) { | 1503 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1498 | zonelist = pgdat->node_zonelists + i; | 1504 | zonelist = pgdat->node_zonelists + i; |
1499 | for (j = 0; zonelist->zones[j] != NULL; j++); | 1505 | for (j = 0; zonelist->zones[j] != NULL; j++); |
1500 | 1506 | ||
1501 | k = highest_zone(i); | 1507 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); |
1502 | |||
1503 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); | ||
1504 | zonelist->zones[j] = NULL; | 1508 | zonelist->zones[j] = NULL; |
1505 | } | 1509 | } |
1506 | } | 1510 | } |
@@ -1510,17 +1514,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat) | |||
1510 | 1514 | ||
1511 | static void __meminit build_zonelists(pg_data_t *pgdat) | 1515 | static void __meminit build_zonelists(pg_data_t *pgdat) |
1512 | { | 1516 | { |
1513 | int i, j, k, node, local_node; | 1517 | int node, local_node; |
1518 | enum zone_type i,j; | ||
1514 | 1519 | ||
1515 | local_node = pgdat->node_id; | 1520 | local_node = pgdat->node_id; |
1516 | for (i = 0; i < GFP_ZONETYPES; i++) { | 1521 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1517 | struct zonelist *zonelist; | 1522 | struct zonelist *zonelist; |
1518 | 1523 | ||
1519 | zonelist = pgdat->node_zonelists + i; | 1524 | zonelist = pgdat->node_zonelists + i; |
1520 | 1525 | ||
1521 | j = 0; | 1526 | j = build_zonelists_node(pgdat, zonelist, 0, i); |
1522 | k = highest_zone(i); | ||
1523 | j = build_zonelists_node(pgdat, zonelist, j, k); | ||
1524 | /* | 1527 | /* |
1525 | * Now we build the zonelist so that it contains the zones | 1528 | * Now we build the zonelist so that it contains the zones |
1526 | * of all the other nodes. | 1529 | * of all the other nodes. |
@@ -1532,12 +1535,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat) | |||
1532 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { | 1535 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { |
1533 | if (!node_online(node)) | 1536 | if (!node_online(node)) |
1534 | continue; | 1537 | continue; |
1535 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); | 1538 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); |
1536 | } | 1539 | } |
1537 | for (node = 0; node < local_node; node++) { | 1540 | for (node = 0; node < local_node; node++) { |
1538 | if (!node_online(node)) | 1541 | if (!node_online(node)) |
1539 | continue; | 1542 | continue; |
1540 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); | 1543 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); |
1541 | } | 1544 | } |
1542 | 1545 | ||
1543 | zonelist->zones[j] = NULL; | 1546 | zonelist->zones[j] = NULL; |
@@ -1643,7 +1646,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, | |||
1643 | unsigned long *zones_size, unsigned long *zholes_size) | 1646 | unsigned long *zones_size, unsigned long *zholes_size) |
1644 | { | 1647 | { |
1645 | unsigned long realtotalpages, totalpages = 0; | 1648 | unsigned long realtotalpages, totalpages = 0; |
1646 | int i; | 1649 | enum zone_type i; |
1647 | 1650 | ||
1648 | for (i = 0; i < MAX_NR_ZONES; i++) | 1651 | for (i = 0; i < MAX_NR_ZONES; i++) |
1649 | totalpages += zones_size[i]; | 1652 | totalpages += zones_size[i]; |
@@ -1698,8 +1701,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, | |||
1698 | } | 1701 | } |
1699 | 1702 | ||
1700 | #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) | 1703 | #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) |
1701 | void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, | 1704 | void zonetable_add(struct zone *zone, int nid, enum zone_type zid, |
1702 | unsigned long size) | 1705 | unsigned long pfn, unsigned long size) |
1703 | { | 1706 | { |
1704 | unsigned long snum = pfn_to_section_nr(pfn); | 1707 | unsigned long snum = pfn_to_section_nr(pfn); |
1705 | unsigned long end = pfn_to_section_nr(pfn + size); | 1708 | unsigned long end = pfn_to_section_nr(pfn + size); |
@@ -1845,8 +1848,10 @@ static inline void free_zone_pagesets(int cpu) | |||
1845 | for_each_zone(zone) { | 1848 | for_each_zone(zone) { |
1846 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | 1849 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); |
1847 | 1850 | ||
1851 | /* Free per_cpu_pageset if it is slab allocated */ | ||
1852 | if (pset != &boot_pageset[cpu]) | ||
1853 | kfree(pset); | ||
1848 | zone_pcp(zone, cpu) = NULL; | 1854 | zone_pcp(zone, cpu) = NULL; |
1849 | kfree(pset); | ||
1850 | } | 1855 | } |
1851 | } | 1856 | } |
1852 | 1857 | ||
@@ -1981,7 +1986,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, | |||
1981 | static void __meminit free_area_init_core(struct pglist_data *pgdat, | 1986 | static void __meminit free_area_init_core(struct pglist_data *pgdat, |
1982 | unsigned long *zones_size, unsigned long *zholes_size) | 1987 | unsigned long *zones_size, unsigned long *zholes_size) |
1983 | { | 1988 | { |
1984 | unsigned long j; | 1989 | enum zone_type j; |
1985 | int nid = pgdat->node_id; | 1990 | int nid = pgdat->node_id; |
1986 | unsigned long zone_start_pfn = pgdat->node_start_pfn; | 1991 | unsigned long zone_start_pfn = pgdat->node_start_pfn; |
1987 | int ret; | 1992 | int ret; |
@@ -1999,15 +2004,16 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
1999 | if (zholes_size) | 2004 | if (zholes_size) |
2000 | realsize -= zholes_size[j]; | 2005 | realsize -= zholes_size[j]; |
2001 | 2006 | ||
2002 | if (j < ZONE_HIGHMEM) | 2007 | if (!is_highmem_idx(j)) |
2003 | nr_kernel_pages += realsize; | 2008 | nr_kernel_pages += realsize; |
2004 | nr_all_pages += realsize; | 2009 | nr_all_pages += realsize; |
2005 | 2010 | ||
2006 | zone->spanned_pages = size; | 2011 | zone->spanned_pages = size; |
2007 | zone->present_pages = realsize; | 2012 | zone->present_pages = realsize; |
2008 | #ifdef CONFIG_NUMA | 2013 | #ifdef CONFIG_NUMA |
2009 | zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) | 2014 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) |
2010 | / 100; | 2015 | / 100; |
2016 | zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; | ||
2011 | #endif | 2017 | #endif |
2012 | zone->name = zone_names[j]; | 2018 | zone->name = zone_names[j]; |
2013 | spin_lock_init(&zone->lock); | 2019 | spin_lock_init(&zone->lock); |
@@ -2129,7 +2135,7 @@ static void calculate_totalreserve_pages(void) | |||
2129 | { | 2135 | { |
2130 | struct pglist_data *pgdat; | 2136 | struct pglist_data *pgdat; |
2131 | unsigned long reserve_pages = 0; | 2137 | unsigned long reserve_pages = 0; |
2132 | int i, j; | 2138 | enum zone_type i, j; |
2133 | 2139 | ||
2134 | for_each_online_pgdat(pgdat) { | 2140 | for_each_online_pgdat(pgdat) { |
2135 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2141 | for (i = 0; i < MAX_NR_ZONES; i++) { |
@@ -2162,7 +2168,7 @@ static void calculate_totalreserve_pages(void) | |||
2162 | static void setup_per_zone_lowmem_reserve(void) | 2168 | static void setup_per_zone_lowmem_reserve(void) |
2163 | { | 2169 | { |
2164 | struct pglist_data *pgdat; | 2170 | struct pglist_data *pgdat; |
2165 | int j, idx; | 2171 | enum zone_type j, idx; |
2166 | 2172 | ||
2167 | for_each_online_pgdat(pgdat) { | 2173 | for_each_online_pgdat(pgdat) { |
2168 | for (j = 0; j < MAX_NR_ZONES; j++) { | 2174 | for (j = 0; j < MAX_NR_ZONES; j++) { |
@@ -2171,9 +2177,12 @@ static void setup_per_zone_lowmem_reserve(void) | |||
2171 | 2177 | ||
2172 | zone->lowmem_reserve[j] = 0; | 2178 | zone->lowmem_reserve[j] = 0; |
2173 | 2179 | ||
2174 | for (idx = j-1; idx >= 0; idx--) { | 2180 | idx = j; |
2181 | while (idx) { | ||
2175 | struct zone *lower_zone; | 2182 | struct zone *lower_zone; |
2176 | 2183 | ||
2184 | idx--; | ||
2185 | |||
2177 | if (sysctl_lowmem_reserve_ratio[idx] < 1) | 2186 | if (sysctl_lowmem_reserve_ratio[idx] < 1) |
2178 | sysctl_lowmem_reserve_ratio[idx] = 1; | 2187 | sysctl_lowmem_reserve_ratio[idx] = 1; |
2179 | 2188 | ||
@@ -2314,10 +2323,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
2314 | return rc; | 2323 | return rc; |
2315 | 2324 | ||
2316 | for_each_zone(zone) | 2325 | for_each_zone(zone) |
2317 | zone->min_unmapped_ratio = (zone->present_pages * | 2326 | zone->min_unmapped_pages = (zone->present_pages * |
2318 | sysctl_min_unmapped_ratio) / 100; | 2327 | sysctl_min_unmapped_ratio) / 100; |
2319 | return 0; | 2328 | return 0; |
2320 | } | 2329 | } |
2330 | |||
2331 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | ||
2332 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2333 | { | ||
2334 | struct zone *zone; | ||
2335 | int rc; | ||
2336 | |||
2337 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2338 | if (rc) | ||
2339 | return rc; | ||
2340 | |||
2341 | for_each_zone(zone) | ||
2342 | zone->min_slab_pages = (zone->present_pages * | ||
2343 | sysctl_min_slab_ratio) / 100; | ||
2344 | return 0; | ||
2345 | } | ||
2321 | #endif | 2346 | #endif |
2322 | 2347 | ||
2323 | /* | 2348 | /* |