aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c155
1 files changed, 120 insertions, 35 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 27b8681139fd..d8ac01474563 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -44,7 +44,7 @@
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/fault-inject.h> 45#include <linux/fault-inject.h>
46#include <linux/page-isolation.h> 46#include <linux/page-isolation.h>
47#include <linux/memcontrol.h> 47#include <linux/page_cgroup.h>
48#include <linux/debugobjects.h> 48#include <linux/debugobjects.h>
49 49
50#include <asm/tlbflush.h> 50#include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
223 223
224static void bad_page(struct page *page) 224static void bad_page(struct page *page)
225{ 225{
226 void *pc = page_get_page_cgroup(page);
227
228 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG 226 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
229 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", 227 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
230 current->comm, page, (int)(2*sizeof(unsigned long)), 228 current->comm, page, (int)(2*sizeof(unsigned long)),
231 (unsigned long)page->flags, page->mapping, 229 (unsigned long)page->flags, page->mapping,
232 page_mapcount(page), page_count(page)); 230 page_mapcount(page), page_count(page));
233 if (pc) { 231
234 printk(KERN_EMERG "cgroup:%p\n", pc);
235 page_reset_bad_cgroup(page);
236 }
237 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" 232 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
238 KERN_EMERG "Backtrace:\n"); 233 KERN_EMERG "Backtrace:\n");
239 dump_stack(); 234 dump_stack();
@@ -268,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order)
268{ 263{
269 int i; 264 int i;
270 int nr_pages = 1 << order; 265 int nr_pages = 1 << order;
266
267 set_compound_page_dtor(page, free_compound_page);
268 set_compound_order(page, order);
269 __SetPageHead(page);
270 for (i = 1; i < nr_pages; i++) {
271 struct page *p = page + i;
272
273 __SetPageTail(p);
274 p->first_page = page;
275 }
276}
277
278#ifdef CONFIG_HUGETLBFS
279void prep_compound_gigantic_page(struct page *page, unsigned long order)
280{
281 int i;
282 int nr_pages = 1 << order;
271 struct page *p = page + 1; 283 struct page *p = page + 1;
272 284
273 set_compound_page_dtor(page, free_compound_page); 285 set_compound_page_dtor(page, free_compound_page);
274 set_compound_order(page, order); 286 set_compound_order(page, order);
275 __SetPageHead(page); 287 __SetPageHead(page);
276 for (i = 1; i < nr_pages; i++, p++) { 288 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
277 if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
278 p = pfn_to_page(page_to_pfn(page) + i);
279 __SetPageTail(p); 289 __SetPageTail(p);
280 p->first_page = page; 290 p->first_page = page;
281 } 291 }
282} 292}
293#endif
283 294
284static void destroy_compound_page(struct page *page, unsigned long order) 295static void destroy_compound_page(struct page *page, unsigned long order)
285{ 296{
286 int i; 297 int i;
287 int nr_pages = 1 << order; 298 int nr_pages = 1 << order;
288 struct page *p = page + 1;
289 299
290 if (unlikely(compound_order(page) != order)) 300 if (unlikely(compound_order(page) != order))
291 bad_page(page); 301 bad_page(page);
@@ -293,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order)
293 if (unlikely(!PageHead(page))) 303 if (unlikely(!PageHead(page)))
294 bad_page(page); 304 bad_page(page);
295 __ClearPageHead(page); 305 __ClearPageHead(page);
296 for (i = 1; i < nr_pages; i++, p++) { 306 for (i = 1; i < nr_pages; i++) {
297 if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) 307 struct page *p = page + i;
298 p = pfn_to_page(page_to_pfn(page) + i);
299 308
300 if (unlikely(!PageTail(p) | 309 if (unlikely(!PageTail(p) |
301 (p->first_page != page))) 310 (p->first_page != page)))
@@ -454,14 +463,16 @@ static inline void __free_one_page(struct page *page,
454 463
455static inline int free_pages_check(struct page *page) 464static inline int free_pages_check(struct page *page)
456{ 465{
466 free_page_mlock(page);
457 if (unlikely(page_mapcount(page) | 467 if (unlikely(page_mapcount(page) |
458 (page->mapping != NULL) | 468 (page->mapping != NULL) |
459 (page_get_page_cgroup(page) != NULL) |
460 (page_count(page) != 0) | 469 (page_count(page) != 0) |
461 (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) 470 (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
462 bad_page(page); 471 bad_page(page);
463 if (PageDirty(page)) 472 if (PageDirty(page))
464 __ClearPageDirty(page); 473 __ClearPageDirty(page);
474 if (PageSwapBacked(page))
475 __ClearPageSwapBacked(page);
465 /* 476 /*
466 * For now, we report if PG_reserved was found set, but do not 477 * For now, we report if PG_reserved was found set, but do not
467 * clear it, and do not free the page. But we shall soon need 478 * clear it, and do not free the page. But we shall soon need
@@ -600,7 +611,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
600{ 611{
601 if (unlikely(page_mapcount(page) | 612 if (unlikely(page_mapcount(page) |
602 (page->mapping != NULL) | 613 (page->mapping != NULL) |
603 (page_get_page_cgroup(page) != NULL) |
604 (page_count(page) != 0) | 614 (page_count(page) != 0) |
605 (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) 615 (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
606 bad_page(page); 616 bad_page(page);
@@ -614,7 +624,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
614 624
615 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | 625 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
616 1 << PG_referenced | 1 << PG_arch_1 | 626 1 << PG_referenced | 1 << PG_arch_1 |
617 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); 627 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
628#ifdef CONFIG_UNEVICTABLE_LRU
629 | 1 << PG_mlocked
630#endif
631 );
618 set_page_private(page, 0); 632 set_page_private(page, 0);
619 set_page_refcounted(page); 633 set_page_refcounted(page);
620 634
@@ -1547,6 +1561,10 @@ nofail_alloc:
1547 1561
1548 /* We now go into synchronous reclaim */ 1562 /* We now go into synchronous reclaim */
1549 cpuset_memory_pressure_bump(); 1563 cpuset_memory_pressure_bump();
1564 /*
1565 * The task's cpuset might have expanded its set of allowable nodes
1566 */
1567 cpuset_update_task_memory_state();
1550 p->flags |= PF_MEMALLOC; 1568 p->flags |= PF_MEMALLOC;
1551 reclaim_state.reclaimed_slab = 0; 1569 reclaim_state.reclaimed_slab = 0;
1552 p->reclaim_state = &reclaim_state; 1570 p->reclaim_state = &reclaim_state;
@@ -1862,10 +1880,21 @@ void show_free_areas(void)
1862 } 1880 }
1863 } 1881 }
1864 1882
1865 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" 1883 printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
1884 " inactive_file:%lu"
1885//TODO: check/adjust line lengths
1886#ifdef CONFIG_UNEVICTABLE_LRU
1887 " unevictable:%lu"
1888#endif
1889 " dirty:%lu writeback:%lu unstable:%lu\n"
1866 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 1890 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
1867 global_page_state(NR_ACTIVE), 1891 global_page_state(NR_ACTIVE_ANON),
1868 global_page_state(NR_INACTIVE), 1892 global_page_state(NR_ACTIVE_FILE),
1893 global_page_state(NR_INACTIVE_ANON),
1894 global_page_state(NR_INACTIVE_FILE),
1895#ifdef CONFIG_UNEVICTABLE_LRU
1896 global_page_state(NR_UNEVICTABLE),
1897#endif
1869 global_page_state(NR_FILE_DIRTY), 1898 global_page_state(NR_FILE_DIRTY),
1870 global_page_state(NR_WRITEBACK), 1899 global_page_state(NR_WRITEBACK),
1871 global_page_state(NR_UNSTABLE_NFS), 1900 global_page_state(NR_UNSTABLE_NFS),
@@ -1888,8 +1917,13 @@ void show_free_areas(void)
1888 " min:%lukB" 1917 " min:%lukB"
1889 " low:%lukB" 1918 " low:%lukB"
1890 " high:%lukB" 1919 " high:%lukB"
1891 " active:%lukB" 1920 " active_anon:%lukB"
1892 " inactive:%lukB" 1921 " inactive_anon:%lukB"
1922 " active_file:%lukB"
1923 " inactive_file:%lukB"
1924#ifdef CONFIG_UNEVICTABLE_LRU
1925 " unevictable:%lukB"
1926#endif
1893 " present:%lukB" 1927 " present:%lukB"
1894 " pages_scanned:%lu" 1928 " pages_scanned:%lu"
1895 " all_unreclaimable? %s" 1929 " all_unreclaimable? %s"
@@ -1899,8 +1933,13 @@ void show_free_areas(void)
1899 K(zone->pages_min), 1933 K(zone->pages_min),
1900 K(zone->pages_low), 1934 K(zone->pages_low),
1901 K(zone->pages_high), 1935 K(zone->pages_high),
1902 K(zone_page_state(zone, NR_ACTIVE)), 1936 K(zone_page_state(zone, NR_ACTIVE_ANON)),
1903 K(zone_page_state(zone, NR_INACTIVE)), 1937 K(zone_page_state(zone, NR_INACTIVE_ANON)),
1938 K(zone_page_state(zone, NR_ACTIVE_FILE)),
1939 K(zone_page_state(zone, NR_INACTIVE_FILE)),
1940#ifdef CONFIG_UNEVICTABLE_LRU
1941 K(zone_page_state(zone, NR_UNEVICTABLE)),
1942#endif
1904 K(zone->present_pages), 1943 K(zone->present_pages),
1905 zone->pages_scanned, 1944 zone->pages_scanned,
1906 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 1945 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3410,10 +3449,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3410 pgdat->nr_zones = 0; 3449 pgdat->nr_zones = 0;
3411 init_waitqueue_head(&pgdat->kswapd_wait); 3450 init_waitqueue_head(&pgdat->kswapd_wait);
3412 pgdat->kswapd_max_order = 0; 3451 pgdat->kswapd_max_order = 0;
3452 pgdat_page_cgroup_init(pgdat);
3413 3453
3414 for (j = 0; j < MAX_NR_ZONES; j++) { 3454 for (j = 0; j < MAX_NR_ZONES; j++) {
3415 struct zone *zone = pgdat->node_zones + j; 3455 struct zone *zone = pgdat->node_zones + j;
3416 unsigned long size, realsize, memmap_pages; 3456 unsigned long size, realsize, memmap_pages;
3457 enum lru_list l;
3417 3458
3418 size = zone_spanned_pages_in_node(nid, j, zones_size); 3459 size = zone_spanned_pages_in_node(nid, j, zones_size);
3419 realsize = size - zone_absent_pages_in_node(nid, j, 3460 realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3428,8 +3469,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3428 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; 3469 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
3429 if (realsize >= memmap_pages) { 3470 if (realsize >= memmap_pages) {
3430 realsize -= memmap_pages; 3471 realsize -= memmap_pages;
3431 mminit_dprintk(MMINIT_TRACE, "memmap_init", 3472 printk(KERN_DEBUG
3432 "%s zone: %lu pages used for memmap\n", 3473 " %s zone: %lu pages used for memmap\n",
3433 zone_names[j], memmap_pages); 3474 zone_names[j], memmap_pages);
3434 } else 3475 } else
3435 printk(KERN_WARNING 3476 printk(KERN_WARNING
@@ -3439,8 +3480,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3439 /* Account for reserved pages */ 3480 /* Account for reserved pages */
3440 if (j == 0 && realsize > dma_reserve) { 3481 if (j == 0 && realsize > dma_reserve) {
3441 realsize -= dma_reserve; 3482 realsize -= dma_reserve;
3442 mminit_dprintk(MMINIT_TRACE, "memmap_init", 3483 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
3443 "%s zone: %lu pages reserved\n",
3444 zone_names[0], dma_reserve); 3484 zone_names[0], dma_reserve);
3445 } 3485 }
3446 3486
@@ -3465,10 +3505,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3465 zone->prev_priority = DEF_PRIORITY; 3505 zone->prev_priority = DEF_PRIORITY;
3466 3506
3467 zone_pcp_init(zone); 3507 zone_pcp_init(zone);
3468 INIT_LIST_HEAD(&zone->active_list); 3508 for_each_lru(l) {
3469 INIT_LIST_HEAD(&zone->inactive_list); 3509 INIT_LIST_HEAD(&zone->lru[l].list);
3470 zone->nr_scan_active = 0; 3510 zone->lru[l].nr_scan = 0;
3471 zone->nr_scan_inactive = 0; 3511 }
3512 zone->recent_rotated[0] = 0;
3513 zone->recent_rotated[1] = 0;
3514 zone->recent_scanned[0] = 0;
3515 zone->recent_scanned[1] = 0;
3472 zap_zone_vm_stats(zone); 3516 zap_zone_vm_stats(zone);
3473 zone->flags = 0; 3517 zone->flags = 0;
3474 if (!size) 3518 if (!size)
@@ -3952,7 +3996,7 @@ static void check_for_regular_memory(pg_data_t *pgdat)
3952void __init free_area_init_nodes(unsigned long *max_zone_pfn) 3996void __init free_area_init_nodes(unsigned long *max_zone_pfn)
3953{ 3997{
3954 unsigned long nid; 3998 unsigned long nid;
3955 enum zone_type i; 3999 int i;
3956 4000
3957 /* Sort early_node_map as initialisation assumes it is sorted */ 4001 /* Sort early_node_map as initialisation assumes it is sorted */
3958 sort_node_map(); 4002 sort_node_map();
@@ -4210,7 +4254,7 @@ void setup_per_zone_pages_min(void)
4210 for_each_zone(zone) { 4254 for_each_zone(zone) {
4211 u64 tmp; 4255 u64 tmp;
4212 4256
4213 spin_lock_irqsave(&zone->lru_lock, flags); 4257 spin_lock_irqsave(&zone->lock, flags);
4214 tmp = (u64)pages_min * zone->present_pages; 4258 tmp = (u64)pages_min * zone->present_pages;
4215 do_div(tmp, lowmem_pages); 4259 do_div(tmp, lowmem_pages);
4216 if (is_highmem(zone)) { 4260 if (is_highmem(zone)) {
@@ -4242,13 +4286,53 @@ void setup_per_zone_pages_min(void)
4242 zone->pages_low = zone->pages_min + (tmp >> 2); 4286 zone->pages_low = zone->pages_min + (tmp >> 2);
4243 zone->pages_high = zone->pages_min + (tmp >> 1); 4287 zone->pages_high = zone->pages_min + (tmp >> 1);
4244 setup_zone_migrate_reserve(zone); 4288 setup_zone_migrate_reserve(zone);
4245 spin_unlock_irqrestore(&zone->lru_lock, flags); 4289 spin_unlock_irqrestore(&zone->lock, flags);
4246 } 4290 }
4247 4291
4248 /* update totalreserve_pages */ 4292 /* update totalreserve_pages */
4249 calculate_totalreserve_pages(); 4293 calculate_totalreserve_pages();
4250} 4294}
4251 4295
4296/**
4297 * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
4298 *
4299 * The inactive anon list should be small enough that the VM never has to
4300 * do too much work, but large enough that each inactive page has a chance
4301 * to be referenced again before it is swapped out.
4302 *
4303 * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
4304 * INACTIVE_ANON pages on this zone's LRU, maintained by the
4305 * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
4306 * the anonymous pages are kept on the inactive list.
4307 *
4308 * total target max
4309 * memory ratio inactive anon
4310 * -------------------------------------
4311 * 10MB 1 5MB
4312 * 100MB 1 50MB
4313 * 1GB 3 250MB
4314 * 10GB 10 0.9GB
4315 * 100GB 31 3GB
4316 * 1TB 101 10GB
4317 * 10TB 320 32GB
4318 */
4319void setup_per_zone_inactive_ratio(void)
4320{
4321 struct zone *zone;
4322
4323 for_each_zone(zone) {
4324 unsigned int gb, ratio;
4325
4326 /* Zone size in gigabytes */
4327 gb = zone->present_pages >> (30 - PAGE_SHIFT);
4328 ratio = int_sqrt(10 * gb);
4329 if (!ratio)
4330 ratio = 1;
4331
4332 zone->inactive_ratio = ratio;
4333 }
4334}
4335
4252/* 4336/*
4253 * Initialise min_free_kbytes. 4337 * Initialise min_free_kbytes.
4254 * 4338 *
@@ -4286,6 +4370,7 @@ static int __init init_per_zone_pages_min(void)
4286 min_free_kbytes = 65536; 4370 min_free_kbytes = 65536;
4287 setup_per_zone_pages_min(); 4371 setup_per_zone_pages_min();
4288 setup_per_zone_lowmem_reserve(); 4372 setup_per_zone_lowmem_reserve();
4373 setup_per_zone_inactive_ratio();
4289 return 0; 4374 return 0;
4290} 4375}
4291module_init(init_per_zone_pages_min) 4376module_init(init_per_zone_pages_min)