aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorHideo AOKI <haoki@redhat.com>2006-04-11 01:52:59 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-11 09:18:32 -0400
commitcb45b0e966cbe747b6189c15b108901cc7d6c97c (patch)
tree0402d4809ec175e80b083f7a713ec32c0109baad /mm/page_alloc.c
parente23ca00bf1b1c6c0f04702cb4d29e275ab8dc330 (diff)
[PATCH] overcommit: add calculate_totalreserve_pages()
These patches are an enhancement of OVERCOMMIT_GUESS algorithm in __vm_enough_memory(). - why the kernel needed patching When the kernel can't allocate anonymous pages in practice, currnet OVERCOMMIT_GUESS could return success. This implementation might be the cause of oom kill in memory pressure situation. If the Linux runs with page reservation features like /proc/sys/vm/lowmem_reserve_ratio and without swap region, I think the oom kill occurs easily. - the overall design approach in the patch When the OVERCOMMET_GUESS algorithm calculates number of free pages, the reserved free pages are regarded as non-free pages. This change helps to avoid the pitfall that the number of free pages become less than the number which the kernel tries to keep free. - testing results I tested the patches using my test kernel module. If the patches aren't applied to the kernel, __vm_enough_memory() returns success in the situation but autual page allocation is failed. On the other hand, if the patches are applied to the kernel, memory allocation failure is avoided since __vm_enough_memory() returns failure in the situation. I checked that on i386 SMP 16GB memory machine. I haven't tested on nommu environment currently. This patch adds totalreserve_pages for __vm_enough_memory(). Calculate_totalreserve_pages() checks maximum lowmem_reserve pages and pages_high in each zone. Finally, the function stores the sum of each zone to totalreserve_pages. The totalreserve_pages is calculated when the VM is initilized. And the variable is updated when /proc/sys/vm/lowmem_reserve_raito or /proc/sys/vm/min_free_kbytes are changed. Signed-off-by: Hideo Aoki <haoki@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b8165e037dee..97d6827c7d66 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
51EXPORT_SYMBOL(node_possible_map); 51EXPORT_SYMBOL(node_possible_map);
52unsigned long totalram_pages __read_mostly; 52unsigned long totalram_pages __read_mostly;
53unsigned long totalhigh_pages __read_mostly; 53unsigned long totalhigh_pages __read_mostly;
54unsigned long totalreserve_pages __read_mostly;
54long nr_swap_pages; 55long nr_swap_pages;
55int percpu_pagelist_fraction; 56int percpu_pagelist_fraction;
56 57
@@ -2477,6 +2478,38 @@ void __init page_alloc_init(void)
2477} 2478}
2478 2479
2479/* 2480/*
2481 * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
2482 * or min_free_kbytes changes.
2483 */
2484static void calculate_totalreserve_pages(void)
2485{
2486 struct pglist_data *pgdat;
2487 unsigned long reserve_pages = 0;
2488 int i, j;
2489
2490 for_each_online_pgdat(pgdat) {
2491 for (i = 0; i < MAX_NR_ZONES; i++) {
2492 struct zone *zone = pgdat->node_zones + i;
2493 unsigned long max = 0;
2494
2495 /* Find valid and maximum lowmem_reserve in the zone */
2496 for (j = i; j < MAX_NR_ZONES; j++) {
2497 if (zone->lowmem_reserve[j] > max)
2498 max = zone->lowmem_reserve[j];
2499 }
2500
2501 /* we treat pages_high as reserved pages. */
2502 max += zone->pages_high;
2503
2504 if (max > zone->present_pages)
2505 max = zone->present_pages;
2506 reserve_pages += max;
2507 }
2508 }
2509 totalreserve_pages = reserve_pages;
2510}
2511
2512/*
2480 * setup_per_zone_lowmem_reserve - called whenever 2513 * setup_per_zone_lowmem_reserve - called whenever
2481 * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone 2514 * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
2482 * has a correct pages reserved value, so an adequate number of 2515 * has a correct pages reserved value, so an adequate number of
@@ -2507,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void)
2507 } 2540 }
2508 } 2541 }
2509 } 2542 }
2543
2544 /* update totalreserve_pages */
2545 calculate_totalreserve_pages();
2510} 2546}
2511 2547
2512/* 2548/*
@@ -2561,6 +2597,9 @@ void setup_per_zone_pages_min(void)
2561 zone->pages_high = zone->pages_min + tmp / 2; 2597 zone->pages_high = zone->pages_min + tmp / 2;
2562 spin_unlock_irqrestore(&zone->lru_lock, flags); 2598 spin_unlock_irqrestore(&zone->lru_lock, flags);
2563 } 2599 }
2600
2601 /* update totalreserve_pages */
2602 calculate_totalreserve_pages();
2564} 2603}
2565 2604
2566/* 2605/*