diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 136 |
1 files changed, 103 insertions, 33 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e293c58bea58..d0a240fbb8bf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -44,7 +44,7 @@ | |||
44 | #include <linux/backing-dev.h> | 44 | #include <linux/backing-dev.h> |
45 | #include <linux/fault-inject.h> | 45 | #include <linux/fault-inject.h> |
46 | #include <linux/page-isolation.h> | 46 | #include <linux/page-isolation.h> |
47 | #include <linux/memcontrol.h> | 47 | #include <linux/page_cgroup.h> |
48 | #include <linux/debugobjects.h> | 48 | #include <linux/debugobjects.h> |
49 | 49 | ||
50 | #include <asm/tlbflush.h> | 50 | #include <asm/tlbflush.h> |
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page) | |||
223 | 223 | ||
224 | static void bad_page(struct page *page) | 224 | static void bad_page(struct page *page) |
225 | { | 225 | { |
226 | void *pc = page_get_page_cgroup(page); | ||
227 | |||
228 | printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG | 226 | printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG |
229 | "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", | 227 | "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", |
230 | current->comm, page, (int)(2*sizeof(unsigned long)), | 228 | current->comm, page, (int)(2*sizeof(unsigned long)), |
231 | (unsigned long)page->flags, page->mapping, | 229 | (unsigned long)page->flags, page->mapping, |
232 | page_mapcount(page), page_count(page)); | 230 | page_mapcount(page), page_count(page)); |
233 | if (pc) { | 231 | |
234 | printk(KERN_EMERG "cgroup:%p\n", pc); | ||
235 | page_reset_bad_cgroup(page); | ||
236 | } | ||
237 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | 232 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" |
238 | KERN_EMERG "Backtrace:\n"); | 233 | KERN_EMERG "Backtrace:\n"); |
239 | dump_stack(); | 234 | dump_stack(); |
@@ -268,13 +263,14 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
268 | { | 263 | { |
269 | int i; | 264 | int i; |
270 | int nr_pages = 1 << order; | 265 | int nr_pages = 1 << order; |
266 | struct page *p = page + 1; | ||
271 | 267 | ||
272 | set_compound_page_dtor(page, free_compound_page); | 268 | set_compound_page_dtor(page, free_compound_page); |
273 | set_compound_order(page, order); | 269 | set_compound_order(page, order); |
274 | __SetPageHead(page); | 270 | __SetPageHead(page); |
275 | for (i = 1; i < nr_pages; i++) { | 271 | for (i = 1; i < nr_pages; i++, p++) { |
276 | struct page *p = page + i; | 272 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) |
277 | 273 | p = pfn_to_page(page_to_pfn(page) + i); | |
278 | __SetPageTail(p); | 274 | __SetPageTail(p); |
279 | p->first_page = page; | 275 | p->first_page = page; |
280 | } | 276 | } |
@@ -284,6 +280,7 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
284 | { | 280 | { |
285 | int i; | 281 | int i; |
286 | int nr_pages = 1 << order; | 282 | int nr_pages = 1 << order; |
283 | struct page *p = page + 1; | ||
287 | 284 | ||
288 | if (unlikely(compound_order(page) != order)) | 285 | if (unlikely(compound_order(page) != order)) |
289 | bad_page(page); | 286 | bad_page(page); |
@@ -291,8 +288,9 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
291 | if (unlikely(!PageHead(page))) | 288 | if (unlikely(!PageHead(page))) |
292 | bad_page(page); | 289 | bad_page(page); |
293 | __ClearPageHead(page); | 290 | __ClearPageHead(page); |
294 | for (i = 1; i < nr_pages; i++) { | 291 | for (i = 1; i < nr_pages; i++, p++) { |
295 | struct page *p = page + i; | 292 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) |
293 | p = pfn_to_page(page_to_pfn(page) + i); | ||
296 | 294 | ||
297 | if (unlikely(!PageTail(p) | | 295 | if (unlikely(!PageTail(p) | |
298 | (p->first_page != page))) | 296 | (p->first_page != page))) |
@@ -451,14 +449,16 @@ static inline void __free_one_page(struct page *page, | |||
451 | 449 | ||
452 | static inline int free_pages_check(struct page *page) | 450 | static inline int free_pages_check(struct page *page) |
453 | { | 451 | { |
452 | free_page_mlock(page); | ||
454 | if (unlikely(page_mapcount(page) | | 453 | if (unlikely(page_mapcount(page) | |
455 | (page->mapping != NULL) | | 454 | (page->mapping != NULL) | |
456 | (page_get_page_cgroup(page) != NULL) | | ||
457 | (page_count(page) != 0) | | 455 | (page_count(page) != 0) | |
458 | (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) | 456 | (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) |
459 | bad_page(page); | 457 | bad_page(page); |
460 | if (PageDirty(page)) | 458 | if (PageDirty(page)) |
461 | __ClearPageDirty(page); | 459 | __ClearPageDirty(page); |
460 | if (PageSwapBacked(page)) | ||
461 | __ClearPageSwapBacked(page); | ||
462 | /* | 462 | /* |
463 | * For now, we report if PG_reserved was found set, but do not | 463 | * For now, we report if PG_reserved was found set, but do not |
464 | * clear it, and do not free the page. But we shall soon need | 464 | * clear it, and do not free the page. But we shall soon need |
@@ -597,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
597 | { | 597 | { |
598 | if (unlikely(page_mapcount(page) | | 598 | if (unlikely(page_mapcount(page) | |
599 | (page->mapping != NULL) | | 599 | (page->mapping != NULL) | |
600 | (page_get_page_cgroup(page) != NULL) | | ||
601 | (page_count(page) != 0) | | 600 | (page_count(page) != 0) | |
602 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) | 601 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) |
603 | bad_page(page); | 602 | bad_page(page); |
@@ -611,7 +610,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
611 | 610 | ||
612 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | | 611 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | |
613 | 1 << PG_referenced | 1 << PG_arch_1 | | 612 | 1 << PG_referenced | 1 << PG_arch_1 | |
614 | 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); | 613 | 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk |
614 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
615 | | 1 << PG_mlocked | ||
616 | #endif | ||
617 | ); | ||
615 | set_page_private(page, 0); | 618 | set_page_private(page, 0); |
616 | set_page_refcounted(page); | 619 | set_page_refcounted(page); |
617 | 620 | ||
@@ -1859,10 +1862,21 @@ void show_free_areas(void) | |||
1859 | } | 1862 | } |
1860 | } | 1863 | } |
1861 | 1864 | ||
1862 | printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" | 1865 | printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" |
1866 | " inactive_file:%lu" | ||
1867 | //TODO: check/adjust line lengths | ||
1868 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
1869 | " unevictable:%lu" | ||
1870 | #endif | ||
1871 | " dirty:%lu writeback:%lu unstable:%lu\n" | ||
1863 | " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", | 1872 | " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", |
1864 | global_page_state(NR_ACTIVE), | 1873 | global_page_state(NR_ACTIVE_ANON), |
1865 | global_page_state(NR_INACTIVE), | 1874 | global_page_state(NR_ACTIVE_FILE), |
1875 | global_page_state(NR_INACTIVE_ANON), | ||
1876 | global_page_state(NR_INACTIVE_FILE), | ||
1877 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
1878 | global_page_state(NR_UNEVICTABLE), | ||
1879 | #endif | ||
1866 | global_page_state(NR_FILE_DIRTY), | 1880 | global_page_state(NR_FILE_DIRTY), |
1867 | global_page_state(NR_WRITEBACK), | 1881 | global_page_state(NR_WRITEBACK), |
1868 | global_page_state(NR_UNSTABLE_NFS), | 1882 | global_page_state(NR_UNSTABLE_NFS), |
@@ -1885,8 +1899,13 @@ void show_free_areas(void) | |||
1885 | " min:%lukB" | 1899 | " min:%lukB" |
1886 | " low:%lukB" | 1900 | " low:%lukB" |
1887 | " high:%lukB" | 1901 | " high:%lukB" |
1888 | " active:%lukB" | 1902 | " active_anon:%lukB" |
1889 | " inactive:%lukB" | 1903 | " inactive_anon:%lukB" |
1904 | " active_file:%lukB" | ||
1905 | " inactive_file:%lukB" | ||
1906 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
1907 | " unevictable:%lukB" | ||
1908 | #endif | ||
1890 | " present:%lukB" | 1909 | " present:%lukB" |
1891 | " pages_scanned:%lu" | 1910 | " pages_scanned:%lu" |
1892 | " all_unreclaimable? %s" | 1911 | " all_unreclaimable? %s" |
@@ -1896,8 +1915,13 @@ void show_free_areas(void) | |||
1896 | K(zone->pages_min), | 1915 | K(zone->pages_min), |
1897 | K(zone->pages_low), | 1916 | K(zone->pages_low), |
1898 | K(zone->pages_high), | 1917 | K(zone->pages_high), |
1899 | K(zone_page_state(zone, NR_ACTIVE)), | 1918 | K(zone_page_state(zone, NR_ACTIVE_ANON)), |
1900 | K(zone_page_state(zone, NR_INACTIVE)), | 1919 | K(zone_page_state(zone, NR_INACTIVE_ANON)), |
1920 | K(zone_page_state(zone, NR_ACTIVE_FILE)), | ||
1921 | K(zone_page_state(zone, NR_INACTIVE_FILE)), | ||
1922 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
1923 | K(zone_page_state(zone, NR_UNEVICTABLE)), | ||
1924 | #endif | ||
1901 | K(zone->present_pages), | 1925 | K(zone->present_pages), |
1902 | zone->pages_scanned, | 1926 | zone->pages_scanned, |
1903 | (zone_is_all_unreclaimable(zone) ? "yes" : "no") | 1927 | (zone_is_all_unreclaimable(zone) ? "yes" : "no") |
@@ -3407,10 +3431,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3407 | pgdat->nr_zones = 0; | 3431 | pgdat->nr_zones = 0; |
3408 | init_waitqueue_head(&pgdat->kswapd_wait); | 3432 | init_waitqueue_head(&pgdat->kswapd_wait); |
3409 | pgdat->kswapd_max_order = 0; | 3433 | pgdat->kswapd_max_order = 0; |
3434 | pgdat_page_cgroup_init(pgdat); | ||
3410 | 3435 | ||
3411 | for (j = 0; j < MAX_NR_ZONES; j++) { | 3436 | for (j = 0; j < MAX_NR_ZONES; j++) { |
3412 | struct zone *zone = pgdat->node_zones + j; | 3437 | struct zone *zone = pgdat->node_zones + j; |
3413 | unsigned long size, realsize, memmap_pages; | 3438 | unsigned long size, realsize, memmap_pages; |
3439 | enum lru_list l; | ||
3414 | 3440 | ||
3415 | size = zone_spanned_pages_in_node(nid, j, zones_size); | 3441 | size = zone_spanned_pages_in_node(nid, j, zones_size); |
3416 | realsize = size - zone_absent_pages_in_node(nid, j, | 3442 | realsize = size - zone_absent_pages_in_node(nid, j, |
@@ -3425,8 +3451,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3425 | PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; | 3451 | PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; |
3426 | if (realsize >= memmap_pages) { | 3452 | if (realsize >= memmap_pages) { |
3427 | realsize -= memmap_pages; | 3453 | realsize -= memmap_pages; |
3428 | mminit_dprintk(MMINIT_TRACE, "memmap_init", | 3454 | printk(KERN_DEBUG |
3429 | "%s zone: %lu pages used for memmap\n", | 3455 | " %s zone: %lu pages used for memmap\n", |
3430 | zone_names[j], memmap_pages); | 3456 | zone_names[j], memmap_pages); |
3431 | } else | 3457 | } else |
3432 | printk(KERN_WARNING | 3458 | printk(KERN_WARNING |
@@ -3436,8 +3462,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3436 | /* Account for reserved pages */ | 3462 | /* Account for reserved pages */ |
3437 | if (j == 0 && realsize > dma_reserve) { | 3463 | if (j == 0 && realsize > dma_reserve) { |
3438 | realsize -= dma_reserve; | 3464 | realsize -= dma_reserve; |
3439 | mminit_dprintk(MMINIT_TRACE, "memmap_init", | 3465 | printk(KERN_DEBUG " %s zone: %lu pages reserved\n", |
3440 | "%s zone: %lu pages reserved\n", | ||
3441 | zone_names[0], dma_reserve); | 3466 | zone_names[0], dma_reserve); |
3442 | } | 3467 | } |
3443 | 3468 | ||
@@ -3462,10 +3487,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3462 | zone->prev_priority = DEF_PRIORITY; | 3487 | zone->prev_priority = DEF_PRIORITY; |
3463 | 3488 | ||
3464 | zone_pcp_init(zone); | 3489 | zone_pcp_init(zone); |
3465 | INIT_LIST_HEAD(&zone->active_list); | 3490 | for_each_lru(l) { |
3466 | INIT_LIST_HEAD(&zone->inactive_list); | 3491 | INIT_LIST_HEAD(&zone->lru[l].list); |
3467 | zone->nr_scan_active = 0; | 3492 | zone->lru[l].nr_scan = 0; |
3468 | zone->nr_scan_inactive = 0; | 3493 | } |
3494 | zone->recent_rotated[0] = 0; | ||
3495 | zone->recent_rotated[1] = 0; | ||
3496 | zone->recent_scanned[0] = 0; | ||
3497 | zone->recent_scanned[1] = 0; | ||
3469 | zap_zone_vm_stats(zone); | 3498 | zap_zone_vm_stats(zone); |
3470 | zone->flags = 0; | 3499 | zone->flags = 0; |
3471 | if (!size) | 3500 | if (!size) |
@@ -3949,7 +3978,7 @@ static void check_for_regular_memory(pg_data_t *pgdat) | |||
3949 | void __init free_area_init_nodes(unsigned long *max_zone_pfn) | 3978 | void __init free_area_init_nodes(unsigned long *max_zone_pfn) |
3950 | { | 3979 | { |
3951 | unsigned long nid; | 3980 | unsigned long nid; |
3952 | enum zone_type i; | 3981 | int i; |
3953 | 3982 | ||
3954 | /* Sort early_node_map as initialisation assumes it is sorted */ | 3983 | /* Sort early_node_map as initialisation assumes it is sorted */ |
3955 | sort_node_map(); | 3984 | sort_node_map(); |
@@ -4207,7 +4236,7 @@ void setup_per_zone_pages_min(void) | |||
4207 | for_each_zone(zone) { | 4236 | for_each_zone(zone) { |
4208 | u64 tmp; | 4237 | u64 tmp; |
4209 | 4238 | ||
4210 | spin_lock_irqsave(&zone->lru_lock, flags); | 4239 | spin_lock_irqsave(&zone->lock, flags); |
4211 | tmp = (u64)pages_min * zone->present_pages; | 4240 | tmp = (u64)pages_min * zone->present_pages; |
4212 | do_div(tmp, lowmem_pages); | 4241 | do_div(tmp, lowmem_pages); |
4213 | if (is_highmem(zone)) { | 4242 | if (is_highmem(zone)) { |
@@ -4239,13 +4268,53 @@ void setup_per_zone_pages_min(void) | |||
4239 | zone->pages_low = zone->pages_min + (tmp >> 2); | 4268 | zone->pages_low = zone->pages_min + (tmp >> 2); |
4240 | zone->pages_high = zone->pages_min + (tmp >> 1); | 4269 | zone->pages_high = zone->pages_min + (tmp >> 1); |
4241 | setup_zone_migrate_reserve(zone); | 4270 | setup_zone_migrate_reserve(zone); |
4242 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 4271 | spin_unlock_irqrestore(&zone->lock, flags); |
4243 | } | 4272 | } |
4244 | 4273 | ||
4245 | /* update totalreserve_pages */ | 4274 | /* update totalreserve_pages */ |
4246 | calculate_totalreserve_pages(); | 4275 | calculate_totalreserve_pages(); |
4247 | } | 4276 | } |
4248 | 4277 | ||
4278 | /** | ||
4279 | * setup_per_zone_inactive_ratio - called when min_free_kbytes changes. | ||
4280 | * | ||
4281 | * The inactive anon list should be small enough that the VM never has to | ||
4282 | * do too much work, but large enough that each inactive page has a chance | ||
4283 | * to be referenced again before it is swapped out. | ||
4284 | * | ||
4285 | * The inactive_anon ratio is the target ratio of ACTIVE_ANON to | ||
4286 | * INACTIVE_ANON pages on this zone's LRU, maintained by the | ||
4287 | * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of | ||
4288 | * the anonymous pages are kept on the inactive list. | ||
4289 | * | ||
4290 | * total target max | ||
4291 | * memory ratio inactive anon | ||
4292 | * ------------------------------------- | ||
4293 | * 10MB 1 5MB | ||
4294 | * 100MB 1 50MB | ||
4295 | * 1GB 3 250MB | ||
4296 | * 10GB 10 0.9GB | ||
4297 | * 100GB 31 3GB | ||
4298 | * 1TB 101 10GB | ||
4299 | * 10TB 320 32GB | ||
4300 | */ | ||
4301 | void setup_per_zone_inactive_ratio(void) | ||
4302 | { | ||
4303 | struct zone *zone; | ||
4304 | |||
4305 | for_each_zone(zone) { | ||
4306 | unsigned int gb, ratio; | ||
4307 | |||
4308 | /* Zone size in gigabytes */ | ||
4309 | gb = zone->present_pages >> (30 - PAGE_SHIFT); | ||
4310 | ratio = int_sqrt(10 * gb); | ||
4311 | if (!ratio) | ||
4312 | ratio = 1; | ||
4313 | |||
4314 | zone->inactive_ratio = ratio; | ||
4315 | } | ||
4316 | } | ||
4317 | |||
4249 | /* | 4318 | /* |
4250 | * Initialise min_free_kbytes. | 4319 | * Initialise min_free_kbytes. |
4251 | * | 4320 | * |
@@ -4283,6 +4352,7 @@ static int __init init_per_zone_pages_min(void) | |||
4283 | min_free_kbytes = 65536; | 4352 | min_free_kbytes = 65536; |
4284 | setup_per_zone_pages_min(); | 4353 | setup_per_zone_pages_min(); |
4285 | setup_per_zone_lowmem_reserve(); | 4354 | setup_per_zone_lowmem_reserve(); |
4355 | setup_per_zone_inactive_ratio(); | ||
4286 | return 0; | 4356 | return 0; |
4287 | } | 4357 | } |
4288 | module_init(init_per_zone_pages_min) | 4358 | module_init(init_per_zone_pages_min) |