diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 343 |
1 files changed, 186 insertions, 157 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe14a8c87fc2..fd47494cb989 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/memory_hotplug.h> | 36 | #include <linux/memory_hotplug.h> |
37 | #include <linux/nodemask.h> | 37 | #include <linux/nodemask.h> |
38 | #include <linux/vmalloc.h> | 38 | #include <linux/vmalloc.h> |
39 | #include <linux/mempolicy.h> | ||
39 | 40 | ||
40 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
41 | #include "internal.h" | 42 | #include "internal.h" |
@@ -53,6 +54,8 @@ unsigned long totalram_pages __read_mostly; | |||
53 | unsigned long totalhigh_pages __read_mostly; | 54 | unsigned long totalhigh_pages __read_mostly; |
54 | long nr_swap_pages; | 55 | long nr_swap_pages; |
55 | 56 | ||
57 | static void fastcall free_hot_cold_page(struct page *page, int cold); | ||
58 | |||
56 | /* | 59 | /* |
57 | * results with 256, 32 in the lowmem_reserve sysctl: | 60 | * results with 256, 32 in the lowmem_reserve sysctl: |
58 | * 1G machine -> (16M dma, 800M-16M normal, 1G-800M high) | 61 | * 1G machine -> (16M dma, 800M-16M normal, 1G-800M high) |
@@ -81,6 +84,7 @@ int min_free_kbytes = 1024; | |||
81 | unsigned long __initdata nr_kernel_pages; | 84 | unsigned long __initdata nr_kernel_pages; |
82 | unsigned long __initdata nr_all_pages; | 85 | unsigned long __initdata nr_all_pages; |
83 | 86 | ||
87 | #ifdef CONFIG_DEBUG_VM | ||
84 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 88 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
85 | { | 89 | { |
86 | int ret = 0; | 90 | int ret = 0; |
@@ -122,16 +126,23 @@ static int bad_range(struct zone *zone, struct page *page) | |||
122 | return 0; | 126 | return 0; |
123 | } | 127 | } |
124 | 128 | ||
125 | static void bad_page(const char *function, struct page *page) | 129 | #else |
130 | static inline int bad_range(struct zone *zone, struct page *page) | ||
131 | { | ||
132 | return 0; | ||
133 | } | ||
134 | #endif | ||
135 | |||
136 | static void bad_page(struct page *page) | ||
126 | { | 137 | { |
127 | printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n", | 138 | printk(KERN_EMERG "Bad page state in process '%s'\n" |
128 | function, current->comm, page); | 139 | "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" |
129 | printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", | 140 | "Trying to fix it up, but a reboot is needed\n" |
130 | (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, | 141 | "Backtrace:\n", |
131 | page->mapping, page_mapcount(page), page_count(page)); | 142 | current->comm, page, (int)(2*sizeof(unsigned long)), |
132 | printk(KERN_EMERG "Backtrace:\n"); | 143 | (unsigned long)page->flags, page->mapping, |
144 | page_mapcount(page), page_count(page)); | ||
133 | dump_stack(); | 145 | dump_stack(); |
134 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"); | ||
135 | page->flags &= ~(1 << PG_lru | | 146 | page->flags &= ~(1 << PG_lru | |
136 | 1 << PG_private | | 147 | 1 << PG_private | |
137 | 1 << PG_locked | | 148 | 1 << PG_locked | |
@@ -184,19 +195,15 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
184 | int i; | 195 | int i; |
185 | int nr_pages = 1 << order; | 196 | int nr_pages = 1 << order; |
186 | 197 | ||
187 | if (!PageCompound(page)) | 198 | if (unlikely(page[1].index != order)) |
188 | return; | 199 | bad_page(page); |
189 | |||
190 | if (page[1].index != order) | ||
191 | bad_page(__FUNCTION__, page); | ||
192 | 200 | ||
193 | for (i = 0; i < nr_pages; i++) { | 201 | for (i = 0; i < nr_pages; i++) { |
194 | struct page *p = page + i; | 202 | struct page *p = page + i; |
195 | 203 | ||
196 | if (!PageCompound(p)) | 204 | if (unlikely(!PageCompound(p) | |
197 | bad_page(__FUNCTION__, page); | 205 | (page_private(p) != (unsigned long)page))) |
198 | if (page_private(p) != (unsigned long)page) | 206 | bad_page(page); |
199 | bad_page(__FUNCTION__, page); | ||
200 | ClearPageCompound(p); | 207 | ClearPageCompound(p); |
201 | } | 208 | } |
202 | } | 209 | } |
@@ -255,14 +262,20 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
255 | /* | 262 | /* |
256 | * This function checks whether a page is free && is the buddy | 263 | * This function checks whether a page is free && is the buddy |
257 | * we can do coalesce a page and its buddy if | 264 | * we can do coalesce a page and its buddy if |
258 | * (a) the buddy is free && | 265 | * (a) the buddy is not in a hole && |
259 | * (b) the buddy is on the buddy system && | 266 | * (b) the buddy is free && |
260 | * (c) a page and its buddy have the same order. | 267 | * (c) the buddy is on the buddy system && |
268 | * (d) a page and its buddy have the same order. | ||
261 | * for recording page's order, we use page_private(page) and PG_private. | 269 | * for recording page's order, we use page_private(page) and PG_private. |
262 | * | 270 | * |
263 | */ | 271 | */ |
264 | static inline int page_is_buddy(struct page *page, int order) | 272 | static inline int page_is_buddy(struct page *page, int order) |
265 | { | 273 | { |
274 | #ifdef CONFIG_HOLES_IN_ZONE | ||
275 | if (!pfn_valid(page_to_pfn(page))) | ||
276 | return 0; | ||
277 | #endif | ||
278 | |||
266 | if (PagePrivate(page) && | 279 | if (PagePrivate(page) && |
267 | (page_order(page) == order) && | 280 | (page_order(page) == order) && |
268 | page_count(page) == 0) | 281 | page_count(page) == 0) |
@@ -300,7 +313,7 @@ static inline void __free_pages_bulk (struct page *page, | |||
300 | unsigned long page_idx; | 313 | unsigned long page_idx; |
301 | int order_size = 1 << order; | 314 | int order_size = 1 << order; |
302 | 315 | ||
303 | if (unlikely(order)) | 316 | if (unlikely(PageCompound(page))) |
304 | destroy_compound_page(page, order); | 317 | destroy_compound_page(page, order); |
305 | 318 | ||
306 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | 319 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); |
@@ -314,17 +327,15 @@ static inline void __free_pages_bulk (struct page *page, | |||
314 | struct free_area *area; | 327 | struct free_area *area; |
315 | struct page *buddy; | 328 | struct page *buddy; |
316 | 329 | ||
317 | combined_idx = __find_combined_index(page_idx, order); | ||
318 | buddy = __page_find_buddy(page, page_idx, order); | 330 | buddy = __page_find_buddy(page, page_idx, order); |
319 | |||
320 | if (bad_range(zone, buddy)) | ||
321 | break; | ||
322 | if (!page_is_buddy(buddy, order)) | 331 | if (!page_is_buddy(buddy, order)) |
323 | break; /* Move the buddy up one level. */ | 332 | break; /* Move the buddy up one level. */ |
333 | |||
324 | list_del(&buddy->lru); | 334 | list_del(&buddy->lru); |
325 | area = zone->free_area + order; | 335 | area = zone->free_area + order; |
326 | area->nr_free--; | 336 | area->nr_free--; |
327 | rmv_page_order(buddy); | 337 | rmv_page_order(buddy); |
338 | combined_idx = __find_combined_index(page_idx, order); | ||
328 | page = page + (combined_idx - page_idx); | 339 | page = page + (combined_idx - page_idx); |
329 | page_idx = combined_idx; | 340 | page_idx = combined_idx; |
330 | order++; | 341 | order++; |
@@ -334,11 +345,11 @@ static inline void __free_pages_bulk (struct page *page, | |||
334 | zone->free_area[order].nr_free++; | 345 | zone->free_area[order].nr_free++; |
335 | } | 346 | } |
336 | 347 | ||
337 | static inline int free_pages_check(const char *function, struct page *page) | 348 | static inline int free_pages_check(struct page *page) |
338 | { | 349 | { |
339 | if ( page_mapcount(page) || | 350 | if (unlikely(page_mapcount(page) | |
340 | page->mapping != NULL || | 351 | (page->mapping != NULL) | |
341 | page_count(page) != 0 || | 352 | (page_count(page) != 0) | |
342 | (page->flags & ( | 353 | (page->flags & ( |
343 | 1 << PG_lru | | 354 | 1 << PG_lru | |
344 | 1 << PG_private | | 355 | 1 << PG_private | |
@@ -348,8 +359,8 @@ static inline int free_pages_check(const char *function, struct page *page) | |||
348 | 1 << PG_slab | | 359 | 1 << PG_slab | |
349 | 1 << PG_swapcache | | 360 | 1 << PG_swapcache | |
350 | 1 << PG_writeback | | 361 | 1 << PG_writeback | |
351 | 1 << PG_reserved ))) | 362 | 1 << PG_reserved )))) |
352 | bad_page(function, page); | 363 | bad_page(page); |
353 | if (PageDirty(page)) | 364 | if (PageDirty(page)) |
354 | __ClearPageDirty(page); | 365 | __ClearPageDirty(page); |
355 | /* | 366 | /* |
@@ -375,11 +386,10 @@ static int | |||
375 | free_pages_bulk(struct zone *zone, int count, | 386 | free_pages_bulk(struct zone *zone, int count, |
376 | struct list_head *list, unsigned int order) | 387 | struct list_head *list, unsigned int order) |
377 | { | 388 | { |
378 | unsigned long flags; | ||
379 | struct page *page = NULL; | 389 | struct page *page = NULL; |
380 | int ret = 0; | 390 | int ret = 0; |
381 | 391 | ||
382 | spin_lock_irqsave(&zone->lock, flags); | 392 | spin_lock(&zone->lock); |
383 | zone->all_unreclaimable = 0; | 393 | zone->all_unreclaimable = 0; |
384 | zone->pages_scanned = 0; | 394 | zone->pages_scanned = 0; |
385 | while (!list_empty(list) && count--) { | 395 | while (!list_empty(list) && count--) { |
@@ -389,12 +399,13 @@ free_pages_bulk(struct zone *zone, int count, | |||
389 | __free_pages_bulk(page, zone, order); | 399 | __free_pages_bulk(page, zone, order); |
390 | ret++; | 400 | ret++; |
391 | } | 401 | } |
392 | spin_unlock_irqrestore(&zone->lock, flags); | 402 | spin_unlock(&zone->lock); |
393 | return ret; | 403 | return ret; |
394 | } | 404 | } |
395 | 405 | ||
396 | void __free_pages_ok(struct page *page, unsigned int order) | 406 | void __free_pages_ok(struct page *page, unsigned int order) |
397 | { | 407 | { |
408 | unsigned long flags; | ||
398 | LIST_HEAD(list); | 409 | LIST_HEAD(list); |
399 | int i; | 410 | int i; |
400 | int reserved = 0; | 411 | int reserved = 0; |
@@ -408,14 +419,49 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
408 | #endif | 419 | #endif |
409 | 420 | ||
410 | for (i = 0 ; i < (1 << order) ; ++i) | 421 | for (i = 0 ; i < (1 << order) ; ++i) |
411 | reserved += free_pages_check(__FUNCTION__, page + i); | 422 | reserved += free_pages_check(page + i); |
412 | if (reserved) | 423 | if (reserved) |
413 | return; | 424 | return; |
414 | 425 | ||
415 | list_add(&page->lru, &list); | 426 | list_add(&page->lru, &list); |
416 | mod_page_state(pgfree, 1 << order); | ||
417 | kernel_map_pages(page, 1<<order, 0); | 427 | kernel_map_pages(page, 1<<order, 0); |
428 | local_irq_save(flags); | ||
429 | __mod_page_state(pgfree, 1 << order); | ||
418 | free_pages_bulk(page_zone(page), 1, &list, order); | 430 | free_pages_bulk(page_zone(page), 1, &list, order); |
431 | local_irq_restore(flags); | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * permit the bootmem allocator to evade page validation on high-order frees | ||
436 | */ | ||
437 | void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) | ||
438 | { | ||
439 | if (order == 0) { | ||
440 | __ClearPageReserved(page); | ||
441 | set_page_count(page, 0); | ||
442 | |||
443 | free_hot_cold_page(page, 0); | ||
444 | } else { | ||
445 | LIST_HEAD(list); | ||
446 | int loop; | ||
447 | |||
448 | for (loop = 0; loop < BITS_PER_LONG; loop++) { | ||
449 | struct page *p = &page[loop]; | ||
450 | |||
451 | if (loop + 16 < BITS_PER_LONG) | ||
452 | prefetchw(p + 16); | ||
453 | __ClearPageReserved(p); | ||
454 | set_page_count(p, 0); | ||
455 | } | ||
456 | |||
457 | arch_free_page(page, order); | ||
458 | |||
459 | mod_page_state(pgfree, 1 << order); | ||
460 | |||
461 | list_add(&page->lru, &list); | ||
462 | kernel_map_pages(page, 1 << order, 0); | ||
463 | free_pages_bulk(page_zone(page), 1, &list, order); | ||
464 | } | ||
419 | } | 465 | } |
420 | 466 | ||
421 | 467 | ||
@@ -433,8 +479,7 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
433 | * | 479 | * |
434 | * -- wli | 480 | * -- wli |
435 | */ | 481 | */ |
436 | static inline struct page * | 482 | static inline void expand(struct zone *zone, struct page *page, |
437 | expand(struct zone *zone, struct page *page, | ||
438 | int low, int high, struct free_area *area) | 483 | int low, int high, struct free_area *area) |
439 | { | 484 | { |
440 | unsigned long size = 1 << high; | 485 | unsigned long size = 1 << high; |
@@ -448,24 +493,6 @@ expand(struct zone *zone, struct page *page, | |||
448 | area->nr_free++; | 493 | area->nr_free++; |
449 | set_page_order(&page[size], high); | 494 | set_page_order(&page[size], high); |
450 | } | 495 | } |
451 | return page; | ||
452 | } | ||
453 | |||
454 | void set_page_refs(struct page *page, int order) | ||
455 | { | ||
456 | #ifdef CONFIG_MMU | ||
457 | set_page_count(page, 1); | ||
458 | #else | ||
459 | int i; | ||
460 | |||
461 | /* | ||
462 | * We need to reference all the pages for this order, otherwise if | ||
463 | * anyone accesses one of the pages with (get/put) it will be freed. | ||
464 | * - eg: access_process_vm() | ||
465 | */ | ||
466 | for (i = 0; i < (1 << order); i++) | ||
467 | set_page_count(page + i, 1); | ||
468 | #endif /* CONFIG_MMU */ | ||
469 | } | 496 | } |
470 | 497 | ||
471 | /* | 498 | /* |
@@ -473,9 +500,9 @@ void set_page_refs(struct page *page, int order) | |||
473 | */ | 500 | */ |
474 | static int prep_new_page(struct page *page, int order) | 501 | static int prep_new_page(struct page *page, int order) |
475 | { | 502 | { |
476 | if ( page_mapcount(page) || | 503 | if (unlikely(page_mapcount(page) | |
477 | page->mapping != NULL || | 504 | (page->mapping != NULL) | |
478 | page_count(page) != 0 || | 505 | (page_count(page) != 0) | |
479 | (page->flags & ( | 506 | (page->flags & ( |
480 | 1 << PG_lru | | 507 | 1 << PG_lru | |
481 | 1 << PG_private | | 508 | 1 << PG_private | |
@@ -486,8 +513,8 @@ static int prep_new_page(struct page *page, int order) | |||
486 | 1 << PG_slab | | 513 | 1 << PG_slab | |
487 | 1 << PG_swapcache | | 514 | 1 << PG_swapcache | |
488 | 1 << PG_writeback | | 515 | 1 << PG_writeback | |
489 | 1 << PG_reserved ))) | 516 | 1 << PG_reserved )))) |
490 | bad_page(__FUNCTION__, page); | 517 | bad_page(page); |
491 | 518 | ||
492 | /* | 519 | /* |
493 | * For now, we report if PG_reserved was found set, but do not | 520 | * For now, we report if PG_reserved was found set, but do not |
@@ -525,7 +552,8 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) | |||
525 | rmv_page_order(page); | 552 | rmv_page_order(page); |
526 | area->nr_free--; | 553 | area->nr_free--; |
527 | zone->free_pages -= 1UL << order; | 554 | zone->free_pages -= 1UL << order; |
528 | return expand(zone, page, order, current_order, area); | 555 | expand(zone, page, order, current_order, area); |
556 | return page; | ||
529 | } | 557 | } |
530 | 558 | ||
531 | return NULL; | 559 | return NULL; |
@@ -539,21 +567,17 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) | |||
539 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 567 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
540 | unsigned long count, struct list_head *list) | 568 | unsigned long count, struct list_head *list) |
541 | { | 569 | { |
542 | unsigned long flags; | ||
543 | int i; | 570 | int i; |
544 | int allocated = 0; | ||
545 | struct page *page; | ||
546 | 571 | ||
547 | spin_lock_irqsave(&zone->lock, flags); | 572 | spin_lock(&zone->lock); |
548 | for (i = 0; i < count; ++i) { | 573 | for (i = 0; i < count; ++i) { |
549 | page = __rmqueue(zone, order); | 574 | struct page *page = __rmqueue(zone, order); |
550 | if (page == NULL) | 575 | if (unlikely(page == NULL)) |
551 | break; | 576 | break; |
552 | allocated++; | ||
553 | list_add_tail(&page->lru, list); | 577 | list_add_tail(&page->lru, list); |
554 | } | 578 | } |
555 | spin_unlock_irqrestore(&zone->lock, flags); | 579 | spin_unlock(&zone->lock); |
556 | return allocated; | 580 | return i; |
557 | } | 581 | } |
558 | 582 | ||
559 | #ifdef CONFIG_NUMA | 583 | #ifdef CONFIG_NUMA |
@@ -589,6 +613,7 @@ void drain_remote_pages(void) | |||
589 | #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) | 613 | #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) |
590 | static void __drain_pages(unsigned int cpu) | 614 | static void __drain_pages(unsigned int cpu) |
591 | { | 615 | { |
616 | unsigned long flags; | ||
592 | struct zone *zone; | 617 | struct zone *zone; |
593 | int i; | 618 | int i; |
594 | 619 | ||
@@ -600,8 +625,10 @@ static void __drain_pages(unsigned int cpu) | |||
600 | struct per_cpu_pages *pcp; | 625 | struct per_cpu_pages *pcp; |
601 | 626 | ||
602 | pcp = &pset->pcp[i]; | 627 | pcp = &pset->pcp[i]; |
628 | local_irq_save(flags); | ||
603 | pcp->count -= free_pages_bulk(zone, pcp->count, | 629 | pcp->count -= free_pages_bulk(zone, pcp->count, |
604 | &pcp->list, 0); | 630 | &pcp->list, 0); |
631 | local_irq_restore(flags); | ||
605 | } | 632 | } |
606 | } | 633 | } |
607 | } | 634 | } |
@@ -647,18 +674,14 @@ void drain_local_pages(void) | |||
647 | } | 674 | } |
648 | #endif /* CONFIG_PM */ | 675 | #endif /* CONFIG_PM */ |
649 | 676 | ||
650 | static void zone_statistics(struct zonelist *zonelist, struct zone *z) | 677 | static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu) |
651 | { | 678 | { |
652 | #ifdef CONFIG_NUMA | 679 | #ifdef CONFIG_NUMA |
653 | unsigned long flags; | ||
654 | int cpu; | ||
655 | pg_data_t *pg = z->zone_pgdat; | 680 | pg_data_t *pg = z->zone_pgdat; |
656 | pg_data_t *orig = zonelist->zones[0]->zone_pgdat; | 681 | pg_data_t *orig = zonelist->zones[0]->zone_pgdat; |
657 | struct per_cpu_pageset *p; | 682 | struct per_cpu_pageset *p; |
658 | 683 | ||
659 | local_irq_save(flags); | 684 | p = zone_pcp(z, cpu); |
660 | cpu = smp_processor_id(); | ||
661 | p = zone_pcp(z,cpu); | ||
662 | if (pg == orig) { | 685 | if (pg == orig) { |
663 | p->numa_hit++; | 686 | p->numa_hit++; |
664 | } else { | 687 | } else { |
@@ -669,14 +692,12 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z) | |||
669 | p->local_node++; | 692 | p->local_node++; |
670 | else | 693 | else |
671 | p->other_node++; | 694 | p->other_node++; |
672 | local_irq_restore(flags); | ||
673 | #endif | 695 | #endif |
674 | } | 696 | } |
675 | 697 | ||
676 | /* | 698 | /* |
677 | * Free a 0-order page | 699 | * Free a 0-order page |
678 | */ | 700 | */ |
679 | static void FASTCALL(free_hot_cold_page(struct page *page, int cold)); | ||
680 | static void fastcall free_hot_cold_page(struct page *page, int cold) | 701 | static void fastcall free_hot_cold_page(struct page *page, int cold) |
681 | { | 702 | { |
682 | struct zone *zone = page_zone(page); | 703 | struct zone *zone = page_zone(page); |
@@ -687,14 +708,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
687 | 708 | ||
688 | if (PageAnon(page)) | 709 | if (PageAnon(page)) |
689 | page->mapping = NULL; | 710 | page->mapping = NULL; |
690 | if (free_pages_check(__FUNCTION__, page)) | 711 | if (free_pages_check(page)) |
691 | return; | 712 | return; |
692 | 713 | ||
693 | inc_page_state(pgfree); | ||
694 | kernel_map_pages(page, 1, 0); | 714 | kernel_map_pages(page, 1, 0); |
695 | 715 | ||
696 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 716 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
697 | local_irq_save(flags); | 717 | local_irq_save(flags); |
718 | __inc_page_state(pgfree); | ||
698 | list_add(&page->lru, &pcp->list); | 719 | list_add(&page->lru, &pcp->list); |
699 | pcp->count++; | 720 | pcp->count++; |
700 | if (pcp->count >= pcp->high) | 721 | if (pcp->count >= pcp->high) |
@@ -727,49 +748,58 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | |||
727 | * we cheat by calling it from here, in the order > 0 path. Saves a branch | 748 | * we cheat by calling it from here, in the order > 0 path. Saves a branch |
728 | * or two. | 749 | * or two. |
729 | */ | 750 | */ |
730 | static struct page * | 751 | static struct page *buffered_rmqueue(struct zonelist *zonelist, |
731 | buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | 752 | struct zone *zone, int order, gfp_t gfp_flags) |
732 | { | 753 | { |
733 | unsigned long flags; | 754 | unsigned long flags; |
734 | struct page *page; | 755 | struct page *page; |
735 | int cold = !!(gfp_flags & __GFP_COLD); | 756 | int cold = !!(gfp_flags & __GFP_COLD); |
757 | int cpu; | ||
736 | 758 | ||
737 | again: | 759 | again: |
760 | cpu = get_cpu(); | ||
738 | if (order == 0) { | 761 | if (order == 0) { |
739 | struct per_cpu_pages *pcp; | 762 | struct per_cpu_pages *pcp; |
740 | 763 | ||
741 | page = NULL; | 764 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; |
742 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | ||
743 | local_irq_save(flags); | 765 | local_irq_save(flags); |
744 | if (pcp->count <= pcp->low) | 766 | if (!pcp->count) { |
745 | pcp->count += rmqueue_bulk(zone, 0, | 767 | pcp->count += rmqueue_bulk(zone, 0, |
746 | pcp->batch, &pcp->list); | 768 | pcp->batch, &pcp->list); |
747 | if (pcp->count) { | 769 | if (unlikely(!pcp->count)) |
748 | page = list_entry(pcp->list.next, struct page, lru); | 770 | goto failed; |
749 | list_del(&page->lru); | ||
750 | pcp->count--; | ||
751 | } | 771 | } |
752 | local_irq_restore(flags); | 772 | page = list_entry(pcp->list.next, struct page, lru); |
753 | put_cpu(); | 773 | list_del(&page->lru); |
774 | pcp->count--; | ||
754 | } else { | 775 | } else { |
755 | spin_lock_irqsave(&zone->lock, flags); | 776 | spin_lock_irqsave(&zone->lock, flags); |
756 | page = __rmqueue(zone, order); | 777 | page = __rmqueue(zone, order); |
757 | spin_unlock_irqrestore(&zone->lock, flags); | 778 | spin_unlock(&zone->lock); |
779 | if (!page) | ||
780 | goto failed; | ||
758 | } | 781 | } |
759 | 782 | ||
760 | if (page != NULL) { | 783 | __mod_page_state_zone(zone, pgalloc, 1 << order); |
761 | BUG_ON(bad_range(zone, page)); | 784 | zone_statistics(zonelist, zone, cpu); |
762 | mod_page_state_zone(zone, pgalloc, 1 << order); | 785 | local_irq_restore(flags); |
763 | if (prep_new_page(page, order)) | 786 | put_cpu(); |
764 | goto again; | 787 | |
788 | BUG_ON(bad_range(zone, page)); | ||
789 | if (prep_new_page(page, order)) | ||
790 | goto again; | ||
765 | 791 | ||
766 | if (gfp_flags & __GFP_ZERO) | 792 | if (gfp_flags & __GFP_ZERO) |
767 | prep_zero_page(page, order, gfp_flags); | 793 | prep_zero_page(page, order, gfp_flags); |
768 | 794 | ||
769 | if (order && (gfp_flags & __GFP_COMP)) | 795 | if (order && (gfp_flags & __GFP_COMP)) |
770 | prep_compound_page(page, order); | 796 | prep_compound_page(page, order); |
771 | } | ||
772 | return page; | 797 | return page; |
798 | |||
799 | failed: | ||
800 | local_irq_restore(flags); | ||
801 | put_cpu(); | ||
802 | return NULL; | ||
773 | } | 803 | } |
774 | 804 | ||
775 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ | 805 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ |
@@ -845,9 +875,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
845 | continue; | 875 | continue; |
846 | } | 876 | } |
847 | 877 | ||
848 | page = buffered_rmqueue(*z, order, gfp_mask); | 878 | page = buffered_rmqueue(zonelist, *z, order, gfp_mask); |
849 | if (page) { | 879 | if (page) { |
850 | zone_statistics(zonelist, *z); | ||
851 | break; | 880 | break; |
852 | } | 881 | } |
853 | } while (*(++z) != NULL); | 882 | } while (*(++z) != NULL); |
@@ -903,8 +932,7 @@ restart: | |||
903 | alloc_flags |= ALLOC_HARDER; | 932 | alloc_flags |= ALLOC_HARDER; |
904 | if (gfp_mask & __GFP_HIGH) | 933 | if (gfp_mask & __GFP_HIGH) |
905 | alloc_flags |= ALLOC_HIGH; | 934 | alloc_flags |= ALLOC_HIGH; |
906 | if (wait) | 935 | alloc_flags |= ALLOC_CPUSET; |
907 | alloc_flags |= ALLOC_CPUSET; | ||
908 | 936 | ||
909 | /* | 937 | /* |
910 | * Go through the zonelist again. Let __GFP_HIGH and allocations | 938 | * Go through the zonelist again. Let __GFP_HIGH and allocations |
@@ -926,7 +954,7 @@ restart: | |||
926 | nofail_alloc: | 954 | nofail_alloc: |
927 | /* go through the zonelist yet again, ignoring mins */ | 955 | /* go through the zonelist yet again, ignoring mins */ |
928 | page = get_page_from_freelist(gfp_mask, order, | 956 | page = get_page_from_freelist(gfp_mask, order, |
929 | zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET); | 957 | zonelist, ALLOC_NO_WATERMARKS); |
930 | if (page) | 958 | if (page) |
931 | goto got_pg; | 959 | goto got_pg; |
932 | if (gfp_mask & __GFP_NOFAIL) { | 960 | if (gfp_mask & __GFP_NOFAIL) { |
@@ -1171,12 +1199,11 @@ EXPORT_SYMBOL(nr_pagecache); | |||
1171 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | 1199 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; |
1172 | #endif | 1200 | #endif |
1173 | 1201 | ||
1174 | void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | 1202 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
1175 | { | 1203 | { |
1176 | int cpu = 0; | 1204 | int cpu = 0; |
1177 | 1205 | ||
1178 | memset(ret, 0, sizeof(*ret)); | 1206 | memset(ret, 0, sizeof(*ret)); |
1179 | cpus_and(*cpumask, *cpumask, cpu_online_map); | ||
1180 | 1207 | ||
1181 | cpu = first_cpu(*cpumask); | 1208 | cpu = first_cpu(*cpumask); |
1182 | while (cpu < NR_CPUS) { | 1209 | while (cpu < NR_CPUS) { |
@@ -1224,12 +1251,12 @@ void get_full_page_state(struct page_state *ret) | |||
1224 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | 1251 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); |
1225 | } | 1252 | } |
1226 | 1253 | ||
1227 | unsigned long __read_page_state(unsigned long offset) | 1254 | unsigned long read_page_state_offset(unsigned long offset) |
1228 | { | 1255 | { |
1229 | unsigned long ret = 0; | 1256 | unsigned long ret = 0; |
1230 | int cpu; | 1257 | int cpu; |
1231 | 1258 | ||
1232 | for_each_online_cpu(cpu) { | 1259 | for_each_cpu(cpu) { |
1233 | unsigned long in; | 1260 | unsigned long in; |
1234 | 1261 | ||
1235 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | 1262 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; |
@@ -1238,18 +1265,26 @@ unsigned long __read_page_state(unsigned long offset) | |||
1238 | return ret; | 1265 | return ret; |
1239 | } | 1266 | } |
1240 | 1267 | ||
1241 | void __mod_page_state(unsigned long offset, unsigned long delta) | 1268 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) |
1269 | { | ||
1270 | void *ptr; | ||
1271 | |||
1272 | ptr = &__get_cpu_var(page_states); | ||
1273 | *(unsigned long *)(ptr + offset) += delta; | ||
1274 | } | ||
1275 | EXPORT_SYMBOL(__mod_page_state_offset); | ||
1276 | |||
1277 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | ||
1242 | { | 1278 | { |
1243 | unsigned long flags; | 1279 | unsigned long flags; |
1244 | void* ptr; | 1280 | void *ptr; |
1245 | 1281 | ||
1246 | local_irq_save(flags); | 1282 | local_irq_save(flags); |
1247 | ptr = &__get_cpu_var(page_states); | 1283 | ptr = &__get_cpu_var(page_states); |
1248 | *(unsigned long*)(ptr + offset) += delta; | 1284 | *(unsigned long *)(ptr + offset) += delta; |
1249 | local_irq_restore(flags); | 1285 | local_irq_restore(flags); |
1250 | } | 1286 | } |
1251 | 1287 | EXPORT_SYMBOL(mod_page_state_offset); | |
1252 | EXPORT_SYMBOL(__mod_page_state); | ||
1253 | 1288 | ||
1254 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | 1289 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, |
1255 | unsigned long *free, struct pglist_data *pgdat) | 1290 | unsigned long *free, struct pglist_data *pgdat) |
@@ -1335,7 +1370,7 @@ void show_free_areas(void) | |||
1335 | show_node(zone); | 1370 | show_node(zone); |
1336 | printk("%s per-cpu:", zone->name); | 1371 | printk("%s per-cpu:", zone->name); |
1337 | 1372 | ||
1338 | if (!zone->present_pages) { | 1373 | if (!populated_zone(zone)) { |
1339 | printk(" empty\n"); | 1374 | printk(" empty\n"); |
1340 | continue; | 1375 | continue; |
1341 | } else | 1376 | } else |
@@ -1347,10 +1382,9 @@ void show_free_areas(void) | |||
1347 | pageset = zone_pcp(zone, cpu); | 1382 | pageset = zone_pcp(zone, cpu); |
1348 | 1383 | ||
1349 | for (temperature = 0; temperature < 2; temperature++) | 1384 | for (temperature = 0; temperature < 2; temperature++) |
1350 | printk("cpu %d %s: low %d, high %d, batch %d used:%d\n", | 1385 | printk("cpu %d %s: high %d, batch %d used:%d\n", |
1351 | cpu, | 1386 | cpu, |
1352 | temperature ? "cold" : "hot", | 1387 | temperature ? "cold" : "hot", |
1353 | pageset->pcp[temperature].low, | ||
1354 | pageset->pcp[temperature].high, | 1388 | pageset->pcp[temperature].high, |
1355 | pageset->pcp[temperature].batch, | 1389 | pageset->pcp[temperature].batch, |
1356 | pageset->pcp[temperature].count); | 1390 | pageset->pcp[temperature].count); |
@@ -1413,7 +1447,7 @@ void show_free_areas(void) | |||
1413 | 1447 | ||
1414 | show_node(zone); | 1448 | show_node(zone); |
1415 | printk("%s: ", zone->name); | 1449 | printk("%s: ", zone->name); |
1416 | if (!zone->present_pages) { | 1450 | if (!populated_zone(zone)) { |
1417 | printk("empty\n"); | 1451 | printk("empty\n"); |
1418 | continue; | 1452 | continue; |
1419 | } | 1453 | } |
@@ -1433,36 +1467,29 @@ void show_free_areas(void) | |||
1433 | 1467 | ||
1434 | /* | 1468 | /* |
1435 | * Builds allocation fallback zone lists. | 1469 | * Builds allocation fallback zone lists. |
1470 | * | ||
1471 | * Add all populated zones of a node to the zonelist. | ||
1436 | */ | 1472 | */ |
1437 | static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) | 1473 | static int __init build_zonelists_node(pg_data_t *pgdat, |
1438 | { | 1474 | struct zonelist *zonelist, int nr_zones, int zone_type) |
1439 | switch (k) { | 1475 | { |
1440 | struct zone *zone; | 1476 | struct zone *zone; |
1441 | default: | 1477 | |
1442 | BUG(); | 1478 | BUG_ON(zone_type > ZONE_HIGHMEM); |
1443 | case ZONE_HIGHMEM: | 1479 | |
1444 | zone = pgdat->node_zones + ZONE_HIGHMEM; | 1480 | do { |
1445 | if (zone->present_pages) { | 1481 | zone = pgdat->node_zones + zone_type; |
1482 | if (populated_zone(zone)) { | ||
1446 | #ifndef CONFIG_HIGHMEM | 1483 | #ifndef CONFIG_HIGHMEM |
1447 | BUG(); | 1484 | BUG_ON(zone_type > ZONE_NORMAL); |
1448 | #endif | 1485 | #endif |
1449 | zonelist->zones[j++] = zone; | 1486 | zonelist->zones[nr_zones++] = zone; |
1487 | check_highest_zone(zone_type); | ||
1450 | } | 1488 | } |
1451 | case ZONE_NORMAL: | 1489 | zone_type--; |
1452 | zone = pgdat->node_zones + ZONE_NORMAL; | ||
1453 | if (zone->present_pages) | ||
1454 | zonelist->zones[j++] = zone; | ||
1455 | case ZONE_DMA32: | ||
1456 | zone = pgdat->node_zones + ZONE_DMA32; | ||
1457 | if (zone->present_pages) | ||
1458 | zonelist->zones[j++] = zone; | ||
1459 | case ZONE_DMA: | ||
1460 | zone = pgdat->node_zones + ZONE_DMA; | ||
1461 | if (zone->present_pages) | ||
1462 | zonelist->zones[j++] = zone; | ||
1463 | } | ||
1464 | 1490 | ||
1465 | return j; | 1491 | } while (zone_type >= 0); |
1492 | return nr_zones; | ||
1466 | } | 1493 | } |
1467 | 1494 | ||
1468 | static inline int highest_zone(int zone_bits) | 1495 | static inline int highest_zone(int zone_bits) |
@@ -1709,8 +1736,6 @@ void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
1709 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { | 1736 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { |
1710 | if (!early_pfn_valid(pfn)) | 1737 | if (!early_pfn_valid(pfn)) |
1711 | continue; | 1738 | continue; |
1712 | if (!early_pfn_in_nid(pfn, nid)) | ||
1713 | continue; | ||
1714 | page = pfn_to_page(pfn); | 1739 | page = pfn_to_page(pfn); |
1715 | set_page_links(page, zone, nid, pfn); | 1740 | set_page_links(page, zone, nid, pfn); |
1716 | set_page_count(page, 1); | 1741 | set_page_count(page, 1); |
@@ -1794,14 +1819,12 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
1794 | 1819 | ||
1795 | pcp = &p->pcp[0]; /* hot */ | 1820 | pcp = &p->pcp[0]; /* hot */ |
1796 | pcp->count = 0; | 1821 | pcp->count = 0; |
1797 | pcp->low = 0; | ||
1798 | pcp->high = 6 * batch; | 1822 | pcp->high = 6 * batch; |
1799 | pcp->batch = max(1UL, 1 * batch); | 1823 | pcp->batch = max(1UL, 1 * batch); |
1800 | INIT_LIST_HEAD(&pcp->list); | 1824 | INIT_LIST_HEAD(&pcp->list); |
1801 | 1825 | ||
1802 | pcp = &p->pcp[1]; /* cold*/ | 1826 | pcp = &p->pcp[1]; /* cold*/ |
1803 | pcp->count = 0; | 1827 | pcp->count = 0; |
1804 | pcp->low = 0; | ||
1805 | pcp->high = 2 * batch; | 1828 | pcp->high = 2 * batch; |
1806 | pcp->batch = max(1UL, batch/2); | 1829 | pcp->batch = max(1UL, batch/2); |
1807 | INIT_LIST_HEAD(&pcp->list); | 1830 | INIT_LIST_HEAD(&pcp->list); |
@@ -2116,7 +2139,7 @@ static int frag_show(struct seq_file *m, void *arg) | |||
2116 | int order; | 2139 | int order; |
2117 | 2140 | ||
2118 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | 2141 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { |
2119 | if (!zone->present_pages) | 2142 | if (!populated_zone(zone)) |
2120 | continue; | 2143 | continue; |
2121 | 2144 | ||
2122 | spin_lock_irqsave(&zone->lock, flags); | 2145 | spin_lock_irqsave(&zone->lock, flags); |
@@ -2149,7 +2172,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2149 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | 2172 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { |
2150 | int i; | 2173 | int i; |
2151 | 2174 | ||
2152 | if (!zone->present_pages) | 2175 | if (!populated_zone(zone)) |
2153 | continue; | 2176 | continue; |
2154 | 2177 | ||
2155 | spin_lock_irqsave(&zone->lock, flags); | 2178 | spin_lock_irqsave(&zone->lock, flags); |
@@ -2197,12 +2220,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2197 | seq_printf(m, | 2220 | seq_printf(m, |
2198 | "\n cpu: %i pcp: %i" | 2221 | "\n cpu: %i pcp: %i" |
2199 | "\n count: %i" | 2222 | "\n count: %i" |
2200 | "\n low: %i" | ||
2201 | "\n high: %i" | 2223 | "\n high: %i" |
2202 | "\n batch: %i", | 2224 | "\n batch: %i", |
2203 | i, j, | 2225 | i, j, |
2204 | pageset->pcp[j].count, | 2226 | pageset->pcp[j].count, |
2205 | pageset->pcp[j].low, | ||
2206 | pageset->pcp[j].high, | 2227 | pageset->pcp[j].high, |
2207 | pageset->pcp[j].batch); | 2228 | pageset->pcp[j].batch); |
2208 | } | 2229 | } |
@@ -2257,32 +2278,40 @@ static char *vmstat_text[] = { | |||
2257 | "pgpgout", | 2278 | "pgpgout", |
2258 | "pswpin", | 2279 | "pswpin", |
2259 | "pswpout", | 2280 | "pswpout", |
2260 | "pgalloc_high", | ||
2261 | 2281 | ||
2282 | "pgalloc_high", | ||
2262 | "pgalloc_normal", | 2283 | "pgalloc_normal", |
2284 | "pgalloc_dma32", | ||
2263 | "pgalloc_dma", | 2285 | "pgalloc_dma", |
2286 | |||
2264 | "pgfree", | 2287 | "pgfree", |
2265 | "pgactivate", | 2288 | "pgactivate", |
2266 | "pgdeactivate", | 2289 | "pgdeactivate", |
2267 | 2290 | ||
2268 | "pgfault", | 2291 | "pgfault", |
2269 | "pgmajfault", | 2292 | "pgmajfault", |
2293 | |||
2270 | "pgrefill_high", | 2294 | "pgrefill_high", |
2271 | "pgrefill_normal", | 2295 | "pgrefill_normal", |
2296 | "pgrefill_dma32", | ||
2272 | "pgrefill_dma", | 2297 | "pgrefill_dma", |
2273 | 2298 | ||
2274 | "pgsteal_high", | 2299 | "pgsteal_high", |
2275 | "pgsteal_normal", | 2300 | "pgsteal_normal", |
2301 | "pgsteal_dma32", | ||
2276 | "pgsteal_dma", | 2302 | "pgsteal_dma", |
2303 | |||
2277 | "pgscan_kswapd_high", | 2304 | "pgscan_kswapd_high", |
2278 | "pgscan_kswapd_normal", | 2305 | "pgscan_kswapd_normal", |
2279 | 2306 | "pgscan_kswapd_dma32", | |
2280 | "pgscan_kswapd_dma", | 2307 | "pgscan_kswapd_dma", |
2308 | |||
2281 | "pgscan_direct_high", | 2309 | "pgscan_direct_high", |
2282 | "pgscan_direct_normal", | 2310 | "pgscan_direct_normal", |
2311 | "pgscan_direct_dma32", | ||
2283 | "pgscan_direct_dma", | 2312 | "pgscan_direct_dma", |
2284 | "pginodesteal", | ||
2285 | 2313 | ||
2314 | "pginodesteal", | ||
2286 | "slabs_scanned", | 2315 | "slabs_scanned", |
2287 | "kswapd_steal", | 2316 | "kswapd_steal", |
2288 | "kswapd_inodesteal", | 2317 | "kswapd_inodesteal", |