diff options
author | Steve French <sfrench@us.ibm.com> | 2006-01-12 17:47:08 -0500 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2006-01-12 17:47:08 -0500 |
commit | 94bc2be31a01a3055ec94176e595dfe208e92d3b (patch) | |
tree | ebfbe81c6718a6390bfa1b99c6d228237d818576 /mm/page_alloc.c | |
parent | c32a0b689cb9cc160cfcd19735bbf50bb70c6ef4 (diff) | |
parent | 58cba4650a7a414eabd2b40cc9d8e45fcdf192d9 (diff) |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 472 |
1 files changed, 278 insertions, 194 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe14a8c87fc2..8c960b469593 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/memory_hotplug.h> | 36 | #include <linux/memory_hotplug.h> |
37 | #include <linux/nodemask.h> | 37 | #include <linux/nodemask.h> |
38 | #include <linux/vmalloc.h> | 38 | #include <linux/vmalloc.h> |
39 | #include <linux/mempolicy.h> | ||
39 | 40 | ||
40 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
41 | #include "internal.h" | 42 | #include "internal.h" |
@@ -52,6 +53,9 @@ struct pglist_data *pgdat_list __read_mostly; | |||
52 | unsigned long totalram_pages __read_mostly; | 53 | unsigned long totalram_pages __read_mostly; |
53 | unsigned long totalhigh_pages __read_mostly; | 54 | unsigned long totalhigh_pages __read_mostly; |
54 | long nr_swap_pages; | 55 | long nr_swap_pages; |
56 | int percpu_pagelist_fraction; | ||
57 | |||
58 | static void fastcall free_hot_cold_page(struct page *page, int cold); | ||
55 | 59 | ||
56 | /* | 60 | /* |
57 | * results with 256, 32 in the lowmem_reserve sysctl: | 61 | * results with 256, 32 in the lowmem_reserve sysctl: |
@@ -81,6 +85,7 @@ int min_free_kbytes = 1024; | |||
81 | unsigned long __initdata nr_kernel_pages; | 85 | unsigned long __initdata nr_kernel_pages; |
82 | unsigned long __initdata nr_all_pages; | 86 | unsigned long __initdata nr_all_pages; |
83 | 87 | ||
88 | #ifdef CONFIG_DEBUG_VM | ||
84 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 89 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
85 | { | 90 | { |
86 | int ret = 0; | 91 | int ret = 0; |
@@ -122,16 +127,23 @@ static int bad_range(struct zone *zone, struct page *page) | |||
122 | return 0; | 127 | return 0; |
123 | } | 128 | } |
124 | 129 | ||
125 | static void bad_page(const char *function, struct page *page) | 130 | #else |
131 | static inline int bad_range(struct zone *zone, struct page *page) | ||
126 | { | 132 | { |
127 | printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n", | 133 | return 0; |
128 | function, current->comm, page); | 134 | } |
129 | printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", | 135 | #endif |
130 | (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, | 136 | |
131 | page->mapping, page_mapcount(page), page_count(page)); | 137 | static void bad_page(struct page *page) |
132 | printk(KERN_EMERG "Backtrace:\n"); | 138 | { |
139 | printk(KERN_EMERG "Bad page state in process '%s'\n" | ||
140 | KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" | ||
141 | KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | ||
142 | KERN_EMERG "Backtrace:\n", | ||
143 | current->comm, page, (int)(2*sizeof(unsigned long)), | ||
144 | (unsigned long)page->flags, page->mapping, | ||
145 | page_mapcount(page), page_count(page)); | ||
133 | dump_stack(); | 146 | dump_stack(); |
134 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"); | ||
135 | page->flags &= ~(1 << PG_lru | | 147 | page->flags &= ~(1 << PG_lru | |
136 | 1 << PG_private | | 148 | 1 << PG_private | |
137 | 1 << PG_locked | | 149 | 1 << PG_locked | |
@@ -184,19 +196,15 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
184 | int i; | 196 | int i; |
185 | int nr_pages = 1 << order; | 197 | int nr_pages = 1 << order; |
186 | 198 | ||
187 | if (!PageCompound(page)) | 199 | if (unlikely(page[1].index != order)) |
188 | return; | 200 | bad_page(page); |
189 | |||
190 | if (page[1].index != order) | ||
191 | bad_page(__FUNCTION__, page); | ||
192 | 201 | ||
193 | for (i = 0; i < nr_pages; i++) { | 202 | for (i = 0; i < nr_pages; i++) { |
194 | struct page *p = page + i; | 203 | struct page *p = page + i; |
195 | 204 | ||
196 | if (!PageCompound(p)) | 205 | if (unlikely(!PageCompound(p) | |
197 | bad_page(__FUNCTION__, page); | 206 | (page_private(p) != (unsigned long)page))) |
198 | if (page_private(p) != (unsigned long)page) | 207 | bad_page(page); |
199 | bad_page(__FUNCTION__, page); | ||
200 | ClearPageCompound(p); | 208 | ClearPageCompound(p); |
201 | } | 209 | } |
202 | } | 210 | } |
@@ -255,14 +263,20 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
255 | /* | 263 | /* |
256 | * This function checks whether a page is free && is the buddy | 264 | * This function checks whether a page is free && is the buddy |
257 | * we can do coalesce a page and its buddy if | 265 | * we can do coalesce a page and its buddy if |
258 | * (a) the buddy is free && | 266 | * (a) the buddy is not in a hole && |
259 | * (b) the buddy is on the buddy system && | 267 | * (b) the buddy is free && |
260 | * (c) a page and its buddy have the same order. | 268 | * (c) the buddy is on the buddy system && |
269 | * (d) a page and its buddy have the same order. | ||
261 | * for recording page's order, we use page_private(page) and PG_private. | 270 | * for recording page's order, we use page_private(page) and PG_private. |
262 | * | 271 | * |
263 | */ | 272 | */ |
264 | static inline int page_is_buddy(struct page *page, int order) | 273 | static inline int page_is_buddy(struct page *page, int order) |
265 | { | 274 | { |
275 | #ifdef CONFIG_HOLES_IN_ZONE | ||
276 | if (!pfn_valid(page_to_pfn(page))) | ||
277 | return 0; | ||
278 | #endif | ||
279 | |||
266 | if (PagePrivate(page) && | 280 | if (PagePrivate(page) && |
267 | (page_order(page) == order) && | 281 | (page_order(page) == order) && |
268 | page_count(page) == 0) | 282 | page_count(page) == 0) |
@@ -294,13 +308,13 @@ static inline int page_is_buddy(struct page *page, int order) | |||
294 | * -- wli | 308 | * -- wli |
295 | */ | 309 | */ |
296 | 310 | ||
297 | static inline void __free_pages_bulk (struct page *page, | 311 | static inline void __free_one_page(struct page *page, |
298 | struct zone *zone, unsigned int order) | 312 | struct zone *zone, unsigned int order) |
299 | { | 313 | { |
300 | unsigned long page_idx; | 314 | unsigned long page_idx; |
301 | int order_size = 1 << order; | 315 | int order_size = 1 << order; |
302 | 316 | ||
303 | if (unlikely(order)) | 317 | if (unlikely(PageCompound(page))) |
304 | destroy_compound_page(page, order); | 318 | destroy_compound_page(page, order); |
305 | 319 | ||
306 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | 320 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); |
@@ -314,17 +328,15 @@ static inline void __free_pages_bulk (struct page *page, | |||
314 | struct free_area *area; | 328 | struct free_area *area; |
315 | struct page *buddy; | 329 | struct page *buddy; |
316 | 330 | ||
317 | combined_idx = __find_combined_index(page_idx, order); | ||
318 | buddy = __page_find_buddy(page, page_idx, order); | 331 | buddy = __page_find_buddy(page, page_idx, order); |
319 | |||
320 | if (bad_range(zone, buddy)) | ||
321 | break; | ||
322 | if (!page_is_buddy(buddy, order)) | 332 | if (!page_is_buddy(buddy, order)) |
323 | break; /* Move the buddy up one level. */ | 333 | break; /* Move the buddy up one level. */ |
334 | |||
324 | list_del(&buddy->lru); | 335 | list_del(&buddy->lru); |
325 | area = zone->free_area + order; | 336 | area = zone->free_area + order; |
326 | area->nr_free--; | 337 | area->nr_free--; |
327 | rmv_page_order(buddy); | 338 | rmv_page_order(buddy); |
339 | combined_idx = __find_combined_index(page_idx, order); | ||
328 | page = page + (combined_idx - page_idx); | 340 | page = page + (combined_idx - page_idx); |
329 | page_idx = combined_idx; | 341 | page_idx = combined_idx; |
330 | order++; | 342 | order++; |
@@ -334,11 +346,11 @@ static inline void __free_pages_bulk (struct page *page, | |||
334 | zone->free_area[order].nr_free++; | 346 | zone->free_area[order].nr_free++; |
335 | } | 347 | } |
336 | 348 | ||
337 | static inline int free_pages_check(const char *function, struct page *page) | 349 | static inline int free_pages_check(struct page *page) |
338 | { | 350 | { |
339 | if ( page_mapcount(page) || | 351 | if (unlikely(page_mapcount(page) | |
340 | page->mapping != NULL || | 352 | (page->mapping != NULL) | |
341 | page_count(page) != 0 || | 353 | (page_count(page) != 0) | |
342 | (page->flags & ( | 354 | (page->flags & ( |
343 | 1 << PG_lru | | 355 | 1 << PG_lru | |
344 | 1 << PG_private | | 356 | 1 << PG_private | |
@@ -348,8 +360,8 @@ static inline int free_pages_check(const char *function, struct page *page) | |||
348 | 1 << PG_slab | | 360 | 1 << PG_slab | |
349 | 1 << PG_swapcache | | 361 | 1 << PG_swapcache | |
350 | 1 << PG_writeback | | 362 | 1 << PG_writeback | |
351 | 1 << PG_reserved ))) | 363 | 1 << PG_reserved )))) |
352 | bad_page(function, page); | 364 | bad_page(page); |
353 | if (PageDirty(page)) | 365 | if (PageDirty(page)) |
354 | __ClearPageDirty(page); | 366 | __ClearPageDirty(page); |
355 | /* | 367 | /* |
@@ -371,51 +383,90 @@ static inline int free_pages_check(const char *function, struct page *page) | |||
371 | * And clear the zone's pages_scanned counter, to hold off the "all pages are | 383 | * And clear the zone's pages_scanned counter, to hold off the "all pages are |
372 | * pinned" detection logic. | 384 | * pinned" detection logic. |
373 | */ | 385 | */ |
374 | static int | 386 | static void free_pages_bulk(struct zone *zone, int count, |
375 | free_pages_bulk(struct zone *zone, int count, | 387 | struct list_head *list, int order) |
376 | struct list_head *list, unsigned int order) | ||
377 | { | 388 | { |
378 | unsigned long flags; | 389 | spin_lock(&zone->lock); |
379 | struct page *page = NULL; | ||
380 | int ret = 0; | ||
381 | |||
382 | spin_lock_irqsave(&zone->lock, flags); | ||
383 | zone->all_unreclaimable = 0; | 390 | zone->all_unreclaimable = 0; |
384 | zone->pages_scanned = 0; | 391 | zone->pages_scanned = 0; |
385 | while (!list_empty(list) && count--) { | 392 | while (count--) { |
393 | struct page *page; | ||
394 | |||
395 | BUG_ON(list_empty(list)); | ||
386 | page = list_entry(list->prev, struct page, lru); | 396 | page = list_entry(list->prev, struct page, lru); |
387 | /* have to delete it as __free_pages_bulk list manipulates */ | 397 | /* have to delete it as __free_one_page list manipulates */ |
388 | list_del(&page->lru); | 398 | list_del(&page->lru); |
389 | __free_pages_bulk(page, zone, order); | 399 | __free_one_page(page, zone, order); |
390 | ret++; | ||
391 | } | 400 | } |
392 | spin_unlock_irqrestore(&zone->lock, flags); | 401 | spin_unlock(&zone->lock); |
393 | return ret; | ||
394 | } | 402 | } |
395 | 403 | ||
396 | void __free_pages_ok(struct page *page, unsigned int order) | 404 | static void free_one_page(struct zone *zone, struct page *page, int order) |
397 | { | 405 | { |
398 | LIST_HEAD(list); | 406 | LIST_HEAD(list); |
407 | list_add(&page->lru, &list); | ||
408 | free_pages_bulk(zone, 1, &list, order); | ||
409 | } | ||
410 | |||
411 | static void __free_pages_ok(struct page *page, unsigned int order) | ||
412 | { | ||
413 | unsigned long flags; | ||
399 | int i; | 414 | int i; |
400 | int reserved = 0; | 415 | int reserved = 0; |
401 | 416 | ||
402 | arch_free_page(page, order); | 417 | arch_free_page(page, order); |
418 | if (!PageHighMem(page)) | ||
419 | mutex_debug_check_no_locks_freed(page_address(page), | ||
420 | PAGE_SIZE<<order); | ||
403 | 421 | ||
404 | #ifndef CONFIG_MMU | 422 | #ifndef CONFIG_MMU |
405 | if (order > 0) | 423 | for (i = 1 ; i < (1 << order) ; ++i) |
406 | for (i = 1 ; i < (1 << order) ; ++i) | 424 | __put_page(page + i); |
407 | __put_page(page + i); | ||
408 | #endif | 425 | #endif |
409 | 426 | ||
410 | for (i = 0 ; i < (1 << order) ; ++i) | 427 | for (i = 0 ; i < (1 << order) ; ++i) |
411 | reserved += free_pages_check(__FUNCTION__, page + i); | 428 | reserved += free_pages_check(page + i); |
412 | if (reserved) | 429 | if (reserved) |
413 | return; | 430 | return; |
414 | 431 | ||
415 | list_add(&page->lru, &list); | 432 | kernel_map_pages(page, 1 << order, 0); |
416 | mod_page_state(pgfree, 1 << order); | 433 | local_irq_save(flags); |
417 | kernel_map_pages(page, 1<<order, 0); | 434 | __mod_page_state(pgfree, 1 << order); |
418 | free_pages_bulk(page_zone(page), 1, &list, order); | 435 | free_one_page(page_zone(page), page, order); |
436 | local_irq_restore(flags); | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * permit the bootmem allocator to evade page validation on high-order frees | ||
441 | */ | ||
442 | void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) | ||
443 | { | ||
444 | if (order == 0) { | ||
445 | __ClearPageReserved(page); | ||
446 | set_page_count(page, 0); | ||
447 | |||
448 | free_hot_cold_page(page, 0); | ||
449 | } else { | ||
450 | LIST_HEAD(list); | ||
451 | int loop; | ||
452 | |||
453 | for (loop = 0; loop < BITS_PER_LONG; loop++) { | ||
454 | struct page *p = &page[loop]; | ||
455 | |||
456 | if (loop + 16 < BITS_PER_LONG) | ||
457 | prefetchw(p + 16); | ||
458 | __ClearPageReserved(p); | ||
459 | set_page_count(p, 0); | ||
460 | } | ||
461 | |||
462 | arch_free_page(page, order); | ||
463 | |||
464 | mod_page_state(pgfree, 1 << order); | ||
465 | |||
466 | list_add(&page->lru, &list); | ||
467 | kernel_map_pages(page, 1 << order, 0); | ||
468 | free_pages_bulk(page_zone(page), 1, &list, order); | ||
469 | } | ||
419 | } | 470 | } |
420 | 471 | ||
421 | 472 | ||
@@ -433,8 +484,7 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
433 | * | 484 | * |
434 | * -- wli | 485 | * -- wli |
435 | */ | 486 | */ |
436 | static inline struct page * | 487 | static inline void expand(struct zone *zone, struct page *page, |
437 | expand(struct zone *zone, struct page *page, | ||
438 | int low, int high, struct free_area *area) | 488 | int low, int high, struct free_area *area) |
439 | { | 489 | { |
440 | unsigned long size = 1 << high; | 490 | unsigned long size = 1 << high; |
@@ -448,24 +498,6 @@ expand(struct zone *zone, struct page *page, | |||
448 | area->nr_free++; | 498 | area->nr_free++; |
449 | set_page_order(&page[size], high); | 499 | set_page_order(&page[size], high); |
450 | } | 500 | } |
451 | return page; | ||
452 | } | ||
453 | |||
454 | void set_page_refs(struct page *page, int order) | ||
455 | { | ||
456 | #ifdef CONFIG_MMU | ||
457 | set_page_count(page, 1); | ||
458 | #else | ||
459 | int i; | ||
460 | |||
461 | /* | ||
462 | * We need to reference all the pages for this order, otherwise if | ||
463 | * anyone accesses one of the pages with (get/put) it will be freed. | ||
464 | * - eg: access_process_vm() | ||
465 | */ | ||
466 | for (i = 0; i < (1 << order); i++) | ||
467 | set_page_count(page + i, 1); | ||
468 | #endif /* CONFIG_MMU */ | ||
469 | } | 501 | } |
470 | 502 | ||
471 | /* | 503 | /* |
@@ -473,9 +505,9 @@ void set_page_refs(struct page *page, int order) | |||
473 | */ | 505 | */ |
474 | static int prep_new_page(struct page *page, int order) | 506 | static int prep_new_page(struct page *page, int order) |
475 | { | 507 | { |
476 | if ( page_mapcount(page) || | 508 | if (unlikely(page_mapcount(page) | |
477 | page->mapping != NULL || | 509 | (page->mapping != NULL) | |
478 | page_count(page) != 0 || | 510 | (page_count(page) != 0) | |
479 | (page->flags & ( | 511 | (page->flags & ( |
480 | 1 << PG_lru | | 512 | 1 << PG_lru | |
481 | 1 << PG_private | | 513 | 1 << PG_private | |
@@ -486,8 +518,8 @@ static int prep_new_page(struct page *page, int order) | |||
486 | 1 << PG_slab | | 518 | 1 << PG_slab | |
487 | 1 << PG_swapcache | | 519 | 1 << PG_swapcache | |
488 | 1 << PG_writeback | | 520 | 1 << PG_writeback | |
489 | 1 << PG_reserved ))) | 521 | 1 << PG_reserved )))) |
490 | bad_page(__FUNCTION__, page); | 522 | bad_page(page); |
491 | 523 | ||
492 | /* | 524 | /* |
493 | * For now, we report if PG_reserved was found set, but do not | 525 | * For now, we report if PG_reserved was found set, but do not |
@@ -525,7 +557,8 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) | |||
525 | rmv_page_order(page); | 557 | rmv_page_order(page); |
526 | area->nr_free--; | 558 | area->nr_free--; |
527 | zone->free_pages -= 1UL << order; | 559 | zone->free_pages -= 1UL << order; |
528 | return expand(zone, page, order, current_order, area); | 560 | expand(zone, page, order, current_order, area); |
561 | return page; | ||
529 | } | 562 | } |
530 | 563 | ||
531 | return NULL; | 564 | return NULL; |
@@ -539,21 +572,17 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) | |||
539 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 572 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
540 | unsigned long count, struct list_head *list) | 573 | unsigned long count, struct list_head *list) |
541 | { | 574 | { |
542 | unsigned long flags; | ||
543 | int i; | 575 | int i; |
544 | int allocated = 0; | ||
545 | struct page *page; | ||
546 | 576 | ||
547 | spin_lock_irqsave(&zone->lock, flags); | 577 | spin_lock(&zone->lock); |
548 | for (i = 0; i < count; ++i) { | 578 | for (i = 0; i < count; ++i) { |
549 | page = __rmqueue(zone, order); | 579 | struct page *page = __rmqueue(zone, order); |
550 | if (page == NULL) | 580 | if (unlikely(page == NULL)) |
551 | break; | 581 | break; |
552 | allocated++; | ||
553 | list_add_tail(&page->lru, list); | 582 | list_add_tail(&page->lru, list); |
554 | } | 583 | } |
555 | spin_unlock_irqrestore(&zone->lock, flags); | 584 | spin_unlock(&zone->lock); |
556 | return allocated; | 585 | return i; |
557 | } | 586 | } |
558 | 587 | ||
559 | #ifdef CONFIG_NUMA | 588 | #ifdef CONFIG_NUMA |
@@ -572,14 +601,13 @@ void drain_remote_pages(void) | |||
572 | if (zone->zone_pgdat->node_id == numa_node_id()) | 601 | if (zone->zone_pgdat->node_id == numa_node_id()) |
573 | continue; | 602 | continue; |
574 | 603 | ||
575 | pset = zone->pageset[smp_processor_id()]; | 604 | pset = zone_pcp(zone, smp_processor_id()); |
576 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 605 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
577 | struct per_cpu_pages *pcp; | 606 | struct per_cpu_pages *pcp; |
578 | 607 | ||
579 | pcp = &pset->pcp[i]; | 608 | pcp = &pset->pcp[i]; |
580 | if (pcp->count) | 609 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); |
581 | pcp->count -= free_pages_bulk(zone, pcp->count, | 610 | pcp->count = 0; |
582 | &pcp->list, 0); | ||
583 | } | 611 | } |
584 | } | 612 | } |
585 | local_irq_restore(flags); | 613 | local_irq_restore(flags); |
@@ -589,6 +617,7 @@ void drain_remote_pages(void) | |||
589 | #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) | 617 | #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) |
590 | static void __drain_pages(unsigned int cpu) | 618 | static void __drain_pages(unsigned int cpu) |
591 | { | 619 | { |
620 | unsigned long flags; | ||
592 | struct zone *zone; | 621 | struct zone *zone; |
593 | int i; | 622 | int i; |
594 | 623 | ||
@@ -600,8 +629,10 @@ static void __drain_pages(unsigned int cpu) | |||
600 | struct per_cpu_pages *pcp; | 629 | struct per_cpu_pages *pcp; |
601 | 630 | ||
602 | pcp = &pset->pcp[i]; | 631 | pcp = &pset->pcp[i]; |
603 | pcp->count -= free_pages_bulk(zone, pcp->count, | 632 | local_irq_save(flags); |
604 | &pcp->list, 0); | 633 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); |
634 | pcp->count = 0; | ||
635 | local_irq_restore(flags); | ||
605 | } | 636 | } |
606 | } | 637 | } |
607 | } | 638 | } |
@@ -647,18 +678,14 @@ void drain_local_pages(void) | |||
647 | } | 678 | } |
648 | #endif /* CONFIG_PM */ | 679 | #endif /* CONFIG_PM */ |
649 | 680 | ||
650 | static void zone_statistics(struct zonelist *zonelist, struct zone *z) | 681 | static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu) |
651 | { | 682 | { |
652 | #ifdef CONFIG_NUMA | 683 | #ifdef CONFIG_NUMA |
653 | unsigned long flags; | ||
654 | int cpu; | ||
655 | pg_data_t *pg = z->zone_pgdat; | 684 | pg_data_t *pg = z->zone_pgdat; |
656 | pg_data_t *orig = zonelist->zones[0]->zone_pgdat; | 685 | pg_data_t *orig = zonelist->zones[0]->zone_pgdat; |
657 | struct per_cpu_pageset *p; | 686 | struct per_cpu_pageset *p; |
658 | 687 | ||
659 | local_irq_save(flags); | 688 | p = zone_pcp(z, cpu); |
660 | cpu = smp_processor_id(); | ||
661 | p = zone_pcp(z,cpu); | ||
662 | if (pg == orig) { | 689 | if (pg == orig) { |
663 | p->numa_hit++; | 690 | p->numa_hit++; |
664 | } else { | 691 | } else { |
@@ -669,14 +696,12 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z) | |||
669 | p->local_node++; | 696 | p->local_node++; |
670 | else | 697 | else |
671 | p->other_node++; | 698 | p->other_node++; |
672 | local_irq_restore(flags); | ||
673 | #endif | 699 | #endif |
674 | } | 700 | } |
675 | 701 | ||
676 | /* | 702 | /* |
677 | * Free a 0-order page | 703 | * Free a 0-order page |
678 | */ | 704 | */ |
679 | static void FASTCALL(free_hot_cold_page(struct page *page, int cold)); | ||
680 | static void fastcall free_hot_cold_page(struct page *page, int cold) | 705 | static void fastcall free_hot_cold_page(struct page *page, int cold) |
681 | { | 706 | { |
682 | struct zone *zone = page_zone(page); | 707 | struct zone *zone = page_zone(page); |
@@ -687,18 +712,20 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
687 | 712 | ||
688 | if (PageAnon(page)) | 713 | if (PageAnon(page)) |
689 | page->mapping = NULL; | 714 | page->mapping = NULL; |
690 | if (free_pages_check(__FUNCTION__, page)) | 715 | if (free_pages_check(page)) |
691 | return; | 716 | return; |
692 | 717 | ||
693 | inc_page_state(pgfree); | ||
694 | kernel_map_pages(page, 1, 0); | 718 | kernel_map_pages(page, 1, 0); |
695 | 719 | ||
696 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 720 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
697 | local_irq_save(flags); | 721 | local_irq_save(flags); |
722 | __inc_page_state(pgfree); | ||
698 | list_add(&page->lru, &pcp->list); | 723 | list_add(&page->lru, &pcp->list); |
699 | pcp->count++; | 724 | pcp->count++; |
700 | if (pcp->count >= pcp->high) | 725 | if (pcp->count >= pcp->high) { |
701 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | 726 | free_pages_bulk(zone, pcp->batch, &pcp->list, 0); |
727 | pcp->count -= pcp->batch; | ||
728 | } | ||
702 | local_irq_restore(flags); | 729 | local_irq_restore(flags); |
703 | put_cpu(); | 730 | put_cpu(); |
704 | } | 731 | } |
@@ -727,49 +754,58 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | |||
727 | * we cheat by calling it from here, in the order > 0 path. Saves a branch | 754 | * we cheat by calling it from here, in the order > 0 path. Saves a branch |
728 | * or two. | 755 | * or two. |
729 | */ | 756 | */ |
730 | static struct page * | 757 | static struct page *buffered_rmqueue(struct zonelist *zonelist, |
731 | buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | 758 | struct zone *zone, int order, gfp_t gfp_flags) |
732 | { | 759 | { |
733 | unsigned long flags; | 760 | unsigned long flags; |
734 | struct page *page; | 761 | struct page *page; |
735 | int cold = !!(gfp_flags & __GFP_COLD); | 762 | int cold = !!(gfp_flags & __GFP_COLD); |
763 | int cpu; | ||
736 | 764 | ||
737 | again: | 765 | again: |
738 | if (order == 0) { | 766 | cpu = get_cpu(); |
767 | if (likely(order == 0)) { | ||
739 | struct per_cpu_pages *pcp; | 768 | struct per_cpu_pages *pcp; |
740 | 769 | ||
741 | page = NULL; | 770 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; |
742 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | ||
743 | local_irq_save(flags); | 771 | local_irq_save(flags); |
744 | if (pcp->count <= pcp->low) | 772 | if (!pcp->count) { |
745 | pcp->count += rmqueue_bulk(zone, 0, | 773 | pcp->count += rmqueue_bulk(zone, 0, |
746 | pcp->batch, &pcp->list); | 774 | pcp->batch, &pcp->list); |
747 | if (pcp->count) { | 775 | if (unlikely(!pcp->count)) |
748 | page = list_entry(pcp->list.next, struct page, lru); | 776 | goto failed; |
749 | list_del(&page->lru); | ||
750 | pcp->count--; | ||
751 | } | 777 | } |
752 | local_irq_restore(flags); | 778 | page = list_entry(pcp->list.next, struct page, lru); |
753 | put_cpu(); | 779 | list_del(&page->lru); |
780 | pcp->count--; | ||
754 | } else { | 781 | } else { |
755 | spin_lock_irqsave(&zone->lock, flags); | 782 | spin_lock_irqsave(&zone->lock, flags); |
756 | page = __rmqueue(zone, order); | 783 | page = __rmqueue(zone, order); |
757 | spin_unlock_irqrestore(&zone->lock, flags); | 784 | spin_unlock(&zone->lock); |
785 | if (!page) | ||
786 | goto failed; | ||
758 | } | 787 | } |
759 | 788 | ||
760 | if (page != NULL) { | 789 | __mod_page_state_zone(zone, pgalloc, 1 << order); |
761 | BUG_ON(bad_range(zone, page)); | 790 | zone_statistics(zonelist, zone, cpu); |
762 | mod_page_state_zone(zone, pgalloc, 1 << order); | 791 | local_irq_restore(flags); |
763 | if (prep_new_page(page, order)) | 792 | put_cpu(); |
764 | goto again; | ||
765 | 793 | ||
766 | if (gfp_flags & __GFP_ZERO) | 794 | BUG_ON(bad_range(zone, page)); |
767 | prep_zero_page(page, order, gfp_flags); | 795 | if (prep_new_page(page, order)) |
796 | goto again; | ||
768 | 797 | ||
769 | if (order && (gfp_flags & __GFP_COMP)) | 798 | if (gfp_flags & __GFP_ZERO) |
770 | prep_compound_page(page, order); | 799 | prep_zero_page(page, order, gfp_flags); |
771 | } | 800 | |
801 | if (order && (gfp_flags & __GFP_COMP)) | ||
802 | prep_compound_page(page, order); | ||
772 | return page; | 803 | return page; |
804 | |||
805 | failed: | ||
806 | local_irq_restore(flags); | ||
807 | put_cpu(); | ||
808 | return NULL; | ||
773 | } | 809 | } |
774 | 810 | ||
775 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ | 811 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ |
@@ -845,9 +881,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
845 | continue; | 881 | continue; |
846 | } | 882 | } |
847 | 883 | ||
848 | page = buffered_rmqueue(*z, order, gfp_mask); | 884 | page = buffered_rmqueue(zonelist, *z, order, gfp_mask); |
849 | if (page) { | 885 | if (page) { |
850 | zone_statistics(zonelist, *z); | ||
851 | break; | 886 | break; |
852 | } | 887 | } |
853 | } while (*(++z) != NULL); | 888 | } while (*(++z) != NULL); |
@@ -896,15 +931,15 @@ restart: | |||
896 | * | 931 | * |
897 | * The caller may dip into page reserves a bit more if the caller | 932 | * The caller may dip into page reserves a bit more if the caller |
898 | * cannot run direct reclaim, or if the caller has realtime scheduling | 933 | * cannot run direct reclaim, or if the caller has realtime scheduling |
899 | * policy. | 934 | * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will |
935 | * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH). | ||
900 | */ | 936 | */ |
901 | alloc_flags = ALLOC_WMARK_MIN; | 937 | alloc_flags = ALLOC_WMARK_MIN; |
902 | if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) | 938 | if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) |
903 | alloc_flags |= ALLOC_HARDER; | 939 | alloc_flags |= ALLOC_HARDER; |
904 | if (gfp_mask & __GFP_HIGH) | 940 | if (gfp_mask & __GFP_HIGH) |
905 | alloc_flags |= ALLOC_HIGH; | 941 | alloc_flags |= ALLOC_HIGH; |
906 | if (wait) | 942 | alloc_flags |= ALLOC_CPUSET; |
907 | alloc_flags |= ALLOC_CPUSET; | ||
908 | 943 | ||
909 | /* | 944 | /* |
910 | * Go through the zonelist again. Let __GFP_HIGH and allocations | 945 | * Go through the zonelist again. Let __GFP_HIGH and allocations |
@@ -926,7 +961,7 @@ restart: | |||
926 | nofail_alloc: | 961 | nofail_alloc: |
927 | /* go through the zonelist yet again, ignoring mins */ | 962 | /* go through the zonelist yet again, ignoring mins */ |
928 | page = get_page_from_freelist(gfp_mask, order, | 963 | page = get_page_from_freelist(gfp_mask, order, |
929 | zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET); | 964 | zonelist, ALLOC_NO_WATERMARKS); |
930 | if (page) | 965 | if (page) |
931 | goto got_pg; | 966 | goto got_pg; |
932 | if (gfp_mask & __GFP_NOFAIL) { | 967 | if (gfp_mask & __GFP_NOFAIL) { |
@@ -945,6 +980,7 @@ rebalance: | |||
945 | cond_resched(); | 980 | cond_resched(); |
946 | 981 | ||
947 | /* We now go into synchronous reclaim */ | 982 | /* We now go into synchronous reclaim */ |
983 | cpuset_memory_pressure_bump(); | ||
948 | p->flags |= PF_MEMALLOC; | 984 | p->flags |= PF_MEMALLOC; |
949 | reclaim_state.reclaimed_slab = 0; | 985 | reclaim_state.reclaimed_slab = 0; |
950 | p->reclaim_state = &reclaim_state; | 986 | p->reclaim_state = &reclaim_state; |
@@ -1171,7 +1207,7 @@ EXPORT_SYMBOL(nr_pagecache); | |||
1171 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | 1207 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; |
1172 | #endif | 1208 | #endif |
1173 | 1209 | ||
1174 | void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | 1210 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
1175 | { | 1211 | { |
1176 | int cpu = 0; | 1212 | int cpu = 0; |
1177 | 1213 | ||
@@ -1224,7 +1260,7 @@ void get_full_page_state(struct page_state *ret) | |||
1224 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | 1260 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); |
1225 | } | 1261 | } |
1226 | 1262 | ||
1227 | unsigned long __read_page_state(unsigned long offset) | 1263 | unsigned long read_page_state_offset(unsigned long offset) |
1228 | { | 1264 | { |
1229 | unsigned long ret = 0; | 1265 | unsigned long ret = 0; |
1230 | int cpu; | 1266 | int cpu; |
@@ -1238,18 +1274,26 @@ unsigned long __read_page_state(unsigned long offset) | |||
1238 | return ret; | 1274 | return ret; |
1239 | } | 1275 | } |
1240 | 1276 | ||
1241 | void __mod_page_state(unsigned long offset, unsigned long delta) | 1277 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) |
1278 | { | ||
1279 | void *ptr; | ||
1280 | |||
1281 | ptr = &__get_cpu_var(page_states); | ||
1282 | *(unsigned long *)(ptr + offset) += delta; | ||
1283 | } | ||
1284 | EXPORT_SYMBOL(__mod_page_state_offset); | ||
1285 | |||
1286 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | ||
1242 | { | 1287 | { |
1243 | unsigned long flags; | 1288 | unsigned long flags; |
1244 | void* ptr; | 1289 | void *ptr; |
1245 | 1290 | ||
1246 | local_irq_save(flags); | 1291 | local_irq_save(flags); |
1247 | ptr = &__get_cpu_var(page_states); | 1292 | ptr = &__get_cpu_var(page_states); |
1248 | *(unsigned long*)(ptr + offset) += delta; | 1293 | *(unsigned long *)(ptr + offset) += delta; |
1249 | local_irq_restore(flags); | 1294 | local_irq_restore(flags); |
1250 | } | 1295 | } |
1251 | 1296 | EXPORT_SYMBOL(mod_page_state_offset); | |
1252 | EXPORT_SYMBOL(__mod_page_state); | ||
1253 | 1297 | ||
1254 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | 1298 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, |
1255 | unsigned long *free, struct pglist_data *pgdat) | 1299 | unsigned long *free, struct pglist_data *pgdat) |
@@ -1335,7 +1379,7 @@ void show_free_areas(void) | |||
1335 | show_node(zone); | 1379 | show_node(zone); |
1336 | printk("%s per-cpu:", zone->name); | 1380 | printk("%s per-cpu:", zone->name); |
1337 | 1381 | ||
1338 | if (!zone->present_pages) { | 1382 | if (!populated_zone(zone)) { |
1339 | printk(" empty\n"); | 1383 | printk(" empty\n"); |
1340 | continue; | 1384 | continue; |
1341 | } else | 1385 | } else |
@@ -1347,10 +1391,9 @@ void show_free_areas(void) | |||
1347 | pageset = zone_pcp(zone, cpu); | 1391 | pageset = zone_pcp(zone, cpu); |
1348 | 1392 | ||
1349 | for (temperature = 0; temperature < 2; temperature++) | 1393 | for (temperature = 0; temperature < 2; temperature++) |
1350 | printk("cpu %d %s: low %d, high %d, batch %d used:%d\n", | 1394 | printk("cpu %d %s: high %d, batch %d used:%d\n", |
1351 | cpu, | 1395 | cpu, |
1352 | temperature ? "cold" : "hot", | 1396 | temperature ? "cold" : "hot", |
1353 | pageset->pcp[temperature].low, | ||
1354 | pageset->pcp[temperature].high, | 1397 | pageset->pcp[temperature].high, |
1355 | pageset->pcp[temperature].batch, | 1398 | pageset->pcp[temperature].batch, |
1356 | pageset->pcp[temperature].count); | 1399 | pageset->pcp[temperature].count); |
@@ -1413,7 +1456,7 @@ void show_free_areas(void) | |||
1413 | 1456 | ||
1414 | show_node(zone); | 1457 | show_node(zone); |
1415 | printk("%s: ", zone->name); | 1458 | printk("%s: ", zone->name); |
1416 | if (!zone->present_pages) { | 1459 | if (!populated_zone(zone)) { |
1417 | printk("empty\n"); | 1460 | printk("empty\n"); |
1418 | continue; | 1461 | continue; |
1419 | } | 1462 | } |
@@ -1433,36 +1476,29 @@ void show_free_areas(void) | |||
1433 | 1476 | ||
1434 | /* | 1477 | /* |
1435 | * Builds allocation fallback zone lists. | 1478 | * Builds allocation fallback zone lists. |
1479 | * | ||
1480 | * Add all populated zones of a node to the zonelist. | ||
1436 | */ | 1481 | */ |
1437 | static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) | 1482 | static int __init build_zonelists_node(pg_data_t *pgdat, |
1438 | { | 1483 | struct zonelist *zonelist, int nr_zones, int zone_type) |
1439 | switch (k) { | 1484 | { |
1440 | struct zone *zone; | 1485 | struct zone *zone; |
1441 | default: | 1486 | |
1442 | BUG(); | 1487 | BUG_ON(zone_type > ZONE_HIGHMEM); |
1443 | case ZONE_HIGHMEM: | 1488 | |
1444 | zone = pgdat->node_zones + ZONE_HIGHMEM; | 1489 | do { |
1445 | if (zone->present_pages) { | 1490 | zone = pgdat->node_zones + zone_type; |
1491 | if (populated_zone(zone)) { | ||
1446 | #ifndef CONFIG_HIGHMEM | 1492 | #ifndef CONFIG_HIGHMEM |
1447 | BUG(); | 1493 | BUG_ON(zone_type > ZONE_NORMAL); |
1448 | #endif | 1494 | #endif |
1449 | zonelist->zones[j++] = zone; | 1495 | zonelist->zones[nr_zones++] = zone; |
1496 | check_highest_zone(zone_type); | ||
1450 | } | 1497 | } |
1451 | case ZONE_NORMAL: | 1498 | zone_type--; |
1452 | zone = pgdat->node_zones + ZONE_NORMAL; | ||
1453 | if (zone->present_pages) | ||
1454 | zonelist->zones[j++] = zone; | ||
1455 | case ZONE_DMA32: | ||
1456 | zone = pgdat->node_zones + ZONE_DMA32; | ||
1457 | if (zone->present_pages) | ||
1458 | zonelist->zones[j++] = zone; | ||
1459 | case ZONE_DMA: | ||
1460 | zone = pgdat->node_zones + ZONE_DMA; | ||
1461 | if (zone->present_pages) | ||
1462 | zonelist->zones[j++] = zone; | ||
1463 | } | ||
1464 | 1499 | ||
1465 | return j; | 1500 | } while (zone_type >= 0); |
1501 | return nr_zones; | ||
1466 | } | 1502 | } |
1467 | 1503 | ||
1468 | static inline int highest_zone(int zone_bits) | 1504 | static inline int highest_zone(int zone_bits) |
@@ -1706,11 +1742,9 @@ void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
1706 | unsigned long end_pfn = start_pfn + size; | 1742 | unsigned long end_pfn = start_pfn + size; |
1707 | unsigned long pfn; | 1743 | unsigned long pfn; |
1708 | 1744 | ||
1709 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { | 1745 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
1710 | if (!early_pfn_valid(pfn)) | 1746 | if (!early_pfn_valid(pfn)) |
1711 | continue; | 1747 | continue; |
1712 | if (!early_pfn_in_nid(pfn, nid)) | ||
1713 | continue; | ||
1714 | page = pfn_to_page(pfn); | 1748 | page = pfn_to_page(pfn); |
1715 | set_page_links(page, zone, nid, pfn); | 1749 | set_page_links(page, zone, nid, pfn); |
1716 | set_page_count(page, 1); | 1750 | set_page_count(page, 1); |
@@ -1794,19 +1828,35 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
1794 | 1828 | ||
1795 | pcp = &p->pcp[0]; /* hot */ | 1829 | pcp = &p->pcp[0]; /* hot */ |
1796 | pcp->count = 0; | 1830 | pcp->count = 0; |
1797 | pcp->low = 0; | ||
1798 | pcp->high = 6 * batch; | 1831 | pcp->high = 6 * batch; |
1799 | pcp->batch = max(1UL, 1 * batch); | 1832 | pcp->batch = max(1UL, 1 * batch); |
1800 | INIT_LIST_HEAD(&pcp->list); | 1833 | INIT_LIST_HEAD(&pcp->list); |
1801 | 1834 | ||
1802 | pcp = &p->pcp[1]; /* cold*/ | 1835 | pcp = &p->pcp[1]; /* cold*/ |
1803 | pcp->count = 0; | 1836 | pcp->count = 0; |
1804 | pcp->low = 0; | ||
1805 | pcp->high = 2 * batch; | 1837 | pcp->high = 2 * batch; |
1806 | pcp->batch = max(1UL, batch/2); | 1838 | pcp->batch = max(1UL, batch/2); |
1807 | INIT_LIST_HEAD(&pcp->list); | 1839 | INIT_LIST_HEAD(&pcp->list); |
1808 | } | 1840 | } |
1809 | 1841 | ||
1842 | /* | ||
1843 | * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist | ||
1844 | * to the value high for the pageset p. | ||
1845 | */ | ||
1846 | |||
1847 | static void setup_pagelist_highmark(struct per_cpu_pageset *p, | ||
1848 | unsigned long high) | ||
1849 | { | ||
1850 | struct per_cpu_pages *pcp; | ||
1851 | |||
1852 | pcp = &p->pcp[0]; /* hot list */ | ||
1853 | pcp->high = high; | ||
1854 | pcp->batch = max(1UL, high/4); | ||
1855 | if ((high/4) > (PAGE_SHIFT * 8)) | ||
1856 | pcp->batch = PAGE_SHIFT * 8; | ||
1857 | } | ||
1858 | |||
1859 | |||
1810 | #ifdef CONFIG_NUMA | 1860 | #ifdef CONFIG_NUMA |
1811 | /* | 1861 | /* |
1812 | * Boot pageset table. One per cpu which is going to be used for all | 1862 | * Boot pageset table. One per cpu which is going to be used for all |
@@ -1838,12 +1888,16 @@ static int __devinit process_zones(int cpu) | |||
1838 | 1888 | ||
1839 | for_each_zone(zone) { | 1889 | for_each_zone(zone) { |
1840 | 1890 | ||
1841 | zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), | 1891 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), |
1842 | GFP_KERNEL, cpu_to_node(cpu)); | 1892 | GFP_KERNEL, cpu_to_node(cpu)); |
1843 | if (!zone->pageset[cpu]) | 1893 | if (!zone_pcp(zone, cpu)) |
1844 | goto bad; | 1894 | goto bad; |
1845 | 1895 | ||
1846 | setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); | 1896 | setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); |
1897 | |||
1898 | if (percpu_pagelist_fraction) | ||
1899 | setup_pagelist_highmark(zone_pcp(zone, cpu), | ||
1900 | (zone->present_pages / percpu_pagelist_fraction)); | ||
1847 | } | 1901 | } |
1848 | 1902 | ||
1849 | return 0; | 1903 | return 0; |
@@ -1851,15 +1905,14 @@ bad: | |||
1851 | for_each_zone(dzone) { | 1905 | for_each_zone(dzone) { |
1852 | if (dzone == zone) | 1906 | if (dzone == zone) |
1853 | break; | 1907 | break; |
1854 | kfree(dzone->pageset[cpu]); | 1908 | kfree(zone_pcp(dzone, cpu)); |
1855 | dzone->pageset[cpu] = NULL; | 1909 | zone_pcp(dzone, cpu) = NULL; |
1856 | } | 1910 | } |
1857 | return -ENOMEM; | 1911 | return -ENOMEM; |
1858 | } | 1912 | } |
1859 | 1913 | ||
1860 | static inline void free_zone_pagesets(int cpu) | 1914 | static inline void free_zone_pagesets(int cpu) |
1861 | { | 1915 | { |
1862 | #ifdef CONFIG_NUMA | ||
1863 | struct zone *zone; | 1916 | struct zone *zone; |
1864 | 1917 | ||
1865 | for_each_zone(zone) { | 1918 | for_each_zone(zone) { |
@@ -1868,7 +1921,6 @@ static inline void free_zone_pagesets(int cpu) | |||
1868 | zone_pcp(zone, cpu) = NULL; | 1921 | zone_pcp(zone, cpu) = NULL; |
1869 | kfree(pset); | 1922 | kfree(pset); |
1870 | } | 1923 | } |
1871 | #endif | ||
1872 | } | 1924 | } |
1873 | 1925 | ||
1874 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, | 1926 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, |
@@ -1939,7 +1991,7 @@ static __devinit void zone_pcp_init(struct zone *zone) | |||
1939 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1991 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
1940 | #ifdef CONFIG_NUMA | 1992 | #ifdef CONFIG_NUMA |
1941 | /* Early boot. Slab allocator not functional yet */ | 1993 | /* Early boot. Slab allocator not functional yet */ |
1942 | zone->pageset[cpu] = &boot_pageset[cpu]; | 1994 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; |
1943 | setup_pageset(&boot_pageset[cpu],0); | 1995 | setup_pageset(&boot_pageset[cpu],0); |
1944 | #else | 1996 | #else |
1945 | setup_pageset(zone_pcp(zone,cpu), batch); | 1997 | setup_pageset(zone_pcp(zone,cpu), batch); |
@@ -2116,7 +2168,7 @@ static int frag_show(struct seq_file *m, void *arg) | |||
2116 | int order; | 2168 | int order; |
2117 | 2169 | ||
2118 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | 2170 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { |
2119 | if (!zone->present_pages) | 2171 | if (!populated_zone(zone)) |
2120 | continue; | 2172 | continue; |
2121 | 2173 | ||
2122 | spin_lock_irqsave(&zone->lock, flags); | 2174 | spin_lock_irqsave(&zone->lock, flags); |
@@ -2149,7 +2201,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2149 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | 2201 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { |
2150 | int i; | 2202 | int i; |
2151 | 2203 | ||
2152 | if (!zone->present_pages) | 2204 | if (!populated_zone(zone)) |
2153 | continue; | 2205 | continue; |
2154 | 2206 | ||
2155 | spin_lock_irqsave(&zone->lock, flags); | 2207 | spin_lock_irqsave(&zone->lock, flags); |
@@ -2182,7 +2234,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2182 | seq_printf(m, | 2234 | seq_printf(m, |
2183 | ")" | 2235 | ")" |
2184 | "\n pagesets"); | 2236 | "\n pagesets"); |
2185 | for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { | 2237 | for_each_online_cpu(i) { |
2186 | struct per_cpu_pageset *pageset; | 2238 | struct per_cpu_pageset *pageset; |
2187 | int j; | 2239 | int j; |
2188 | 2240 | ||
@@ -2197,12 +2249,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2197 | seq_printf(m, | 2249 | seq_printf(m, |
2198 | "\n cpu: %i pcp: %i" | 2250 | "\n cpu: %i pcp: %i" |
2199 | "\n count: %i" | 2251 | "\n count: %i" |
2200 | "\n low: %i" | ||
2201 | "\n high: %i" | 2252 | "\n high: %i" |
2202 | "\n batch: %i", | 2253 | "\n batch: %i", |
2203 | i, j, | 2254 | i, j, |
2204 | pageset->pcp[j].count, | 2255 | pageset->pcp[j].count, |
2205 | pageset->pcp[j].low, | ||
2206 | pageset->pcp[j].high, | 2256 | pageset->pcp[j].high, |
2207 | pageset->pcp[j].batch); | 2257 | pageset->pcp[j].batch); |
2208 | } | 2258 | } |
@@ -2257,32 +2307,40 @@ static char *vmstat_text[] = { | |||
2257 | "pgpgout", | 2307 | "pgpgout", |
2258 | "pswpin", | 2308 | "pswpin", |
2259 | "pswpout", | 2309 | "pswpout", |
2260 | "pgalloc_high", | ||
2261 | 2310 | ||
2311 | "pgalloc_high", | ||
2262 | "pgalloc_normal", | 2312 | "pgalloc_normal", |
2313 | "pgalloc_dma32", | ||
2263 | "pgalloc_dma", | 2314 | "pgalloc_dma", |
2315 | |||
2264 | "pgfree", | 2316 | "pgfree", |
2265 | "pgactivate", | 2317 | "pgactivate", |
2266 | "pgdeactivate", | 2318 | "pgdeactivate", |
2267 | 2319 | ||
2268 | "pgfault", | 2320 | "pgfault", |
2269 | "pgmajfault", | 2321 | "pgmajfault", |
2322 | |||
2270 | "pgrefill_high", | 2323 | "pgrefill_high", |
2271 | "pgrefill_normal", | 2324 | "pgrefill_normal", |
2325 | "pgrefill_dma32", | ||
2272 | "pgrefill_dma", | 2326 | "pgrefill_dma", |
2273 | 2327 | ||
2274 | "pgsteal_high", | 2328 | "pgsteal_high", |
2275 | "pgsteal_normal", | 2329 | "pgsteal_normal", |
2330 | "pgsteal_dma32", | ||
2276 | "pgsteal_dma", | 2331 | "pgsteal_dma", |
2332 | |||
2277 | "pgscan_kswapd_high", | 2333 | "pgscan_kswapd_high", |
2278 | "pgscan_kswapd_normal", | 2334 | "pgscan_kswapd_normal", |
2279 | 2335 | "pgscan_kswapd_dma32", | |
2280 | "pgscan_kswapd_dma", | 2336 | "pgscan_kswapd_dma", |
2337 | |||
2281 | "pgscan_direct_high", | 2338 | "pgscan_direct_high", |
2282 | "pgscan_direct_normal", | 2339 | "pgscan_direct_normal", |
2340 | "pgscan_direct_dma32", | ||
2283 | "pgscan_direct_dma", | 2341 | "pgscan_direct_dma", |
2284 | "pginodesteal", | ||
2285 | 2342 | ||
2343 | "pginodesteal", | ||
2286 | "slabs_scanned", | 2344 | "slabs_scanned", |
2287 | "kswapd_steal", | 2345 | "kswapd_steal", |
2288 | "kswapd_inodesteal", | 2346 | "kswapd_inodesteal", |
@@ -2539,6 +2597,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
2539 | return 0; | 2597 | return 0; |
2540 | } | 2598 | } |
2541 | 2599 | ||
2600 | /* | ||
2601 | * percpu_pagelist_fraction - changes the pcp->high for each zone on each | ||
2602 | * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist | ||
2603 | * can have before it gets flushed back to buddy allocator. | ||
2604 | */ | ||
2605 | |||
2606 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | ||
2607 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2608 | { | ||
2609 | struct zone *zone; | ||
2610 | unsigned int cpu; | ||
2611 | int ret; | ||
2612 | |||
2613 | ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2614 | if (!write || (ret == -EINVAL)) | ||
2615 | return ret; | ||
2616 | for_each_zone(zone) { | ||
2617 | for_each_online_cpu(cpu) { | ||
2618 | unsigned long high; | ||
2619 | high = zone->present_pages / percpu_pagelist_fraction; | ||
2620 | setup_pagelist_highmark(zone_pcp(zone, cpu), high); | ||
2621 | } | ||
2622 | } | ||
2623 | return 0; | ||
2624 | } | ||
2625 | |||
2542 | __initdata int hashdist = HASHDIST_DEFAULT; | 2626 | __initdata int hashdist = HASHDIST_DEFAULT; |
2543 | 2627 | ||
2544 | #ifdef CONFIG_NUMA | 2628 | #ifdef CONFIG_NUMA |